diff --git a/README.rst b/README.rst index ac3c3b500..cf758b0c7 100644 --- a/README.rst +++ b/README.rst @@ -1,14 +1,31 @@ -Managing single-cell data sets and neural networks used for analysis -===================================================================== +|Stars| |PyPI| |PyPIDownloads| -.. image:: https://github.com/theislab/sfaira/blob/master/resources/images/concept.jpeg - :width: 600px +.. |Stars| image:: https://img.shields.io/github/stars/theislab/sfaira?logo=GitHub&color=yellow + :target: https://github.com/theislab/sfaira/stargazers +.. |PyPI| image:: https://img.shields.io/pypi/v/sfaira?logo=PyPI + :target: https://pypi.org/project/sfaira +.. |PyPIDownloads| image:: https://pepy.tech/badge/sfaira + :target: https://pepy.tech/project/sfaira + + +sfaira - data and model repository for single-cell data +======================================================= + +.. image:: https://github.com/theislab/sfaira/blob/master/resources/images/concept.png + :width: 1000px :align: center sfaira_ is a model and a data repository in a single python package. -Its data API gives users access to streamlined data loaders that allow reproducible use of published and private data sets for model training and exploration. -Its model API gives user streamlined access to pre-trained models and to common model architectures to ease usage of neural networks in common single-cell analysis workflows. +We provide an interactive overview of the current state of the zoos on sfaira-site_. + +Its data zoo gives users access to streamlined data loaders that allow reproducible use of published and private data sets for model training and exploration. +Its model zoo gives user streamlined access to pre-trained models and to common model architectures to ease usage of neural networks in common single-cell analysis workflows: +A model zoo is a software infrastructure that improves user access to pre-trained models which are separately published, such as DCA_ or scArches_: +Instead of focussing on developing new models, we focus on making models easily accessible to users and distributable by developers. sfaira integrates into scanpy_ workflows. .. _scanpy: https://github.com/theislab/scanpy .. _sfaira: https://sfaira.readthedocs.io +.. _DCA: https://github.com/theislab/dca +.. _scArches: https://github.com/theislab/scarches +.. _sfaira-site: https://theislab.github.io/sfaira-site/index.html diff --git a/docs/api/index.rst b/docs/api/index.rst new file mode 100644 index 000000000..0230f0b1a --- /dev/null +++ b/docs/api/index.rst @@ -0,0 +1,154 @@ +.. module:: sfaira +.. automodule:: sfaira + :noindex: + +API +=== + +Import sfaira as:: + + import sfaira + + + +Data: `data` +------------ + +.. module:: sfaira.data +.. currentmodule:: sfaira + +The sfaira data zoo API. + + +Pre-defined data set collections +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This sub-module gives you access to curated subsets of the data zoo, e.g. all data sets from human lungs. + +.. autosummary:: + :toctree: . + + data.human + data.mouse + + +Functionalities for interactive data analysis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This sub-module gives you access to functionalities you need to define your own data set collections based on the sfaira data zoo. + +.. autosummary:: + :toctree: . + + data.DatasetBase + data.DatasetGroupBase + data.DatasetSuperGroup + + +Functionalities for interactive data analysis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This sub-module gives you access to functionalities you need to load new data live into the data zoo to handle a raw data set in the context of zoo data sets. + +.. autosummary:: + :toctree: . + + data.DatasetInteractive + + +Genomes: `genomes` +------------------ + +.. module:: sfaira.genomes +.. currentmodule:: sfaira + +This sub-module gives you access to properties of the genome representations used in sfaira. + +.. autosummary:: + :toctree: . + + genomes.ExtractFeatureListEnsemble + + +Models: `models` +---------------- + +.. module:: sfaira.models +.. currentmodule:: sfaira + +The sfaira model zoo API for advanced use. +This API is structured by streamlined, task-specific APIs for specific analysis problems. +This API is targeted at developers, see also `ui` for a user centric wrapping API for this model zoo. + + +Cell-type predictor models +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This sub-module handles models that predict cell types. + +.. autosummary:: + :toctree: . + + models.celltype + + +Embedding models +~~~~~~~~~~~~~~~~ + +This sub-module handles models that embed expression vectors (cells) into a latent space. + +.. autosummary:: + :toctree: . + + models.embedding + + +Train: `train` +-------------- + +.. module:: sfaira.train +.. currentmodule:: sfaira + +The interface for training sfaira compatible models. +This is a sub-module dedicated for developers to ease model training and deployment. + +Trainer classes +~~~~~~~~~~~~~~~ + +Trainer class wrap estimator classes (which wrap model classes) and handle grid-search specific tasks centred on model fits, +such as saving evaluation metrics and model weights. + +.. autosummary:: + :toctree: . + + train.TargetZoos + train.TrainModelCelltype + train.TrainModelEmbedding + + +Grid search summary classes +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Grid search summary classes allow a developer to easily interact with a finished grid search by loading and summarising results, +which were saved through Trainer classes. + +.. autosummary:: + :toctree: . + + train.GridsearchContainer + train.SummarizeGridsearchCelltype + train.SummarizeGridsearchEmbedding + +User interface: `ui` +-------------------- + +.. module:: sfaira.ui +.. currentmodule:: sfaira + +This sub-module gives users access to the model zoo, including model query from remote servers. +This API is designed to be used in analysis workflows and does not require any understanding of the way models are defined and stored. + +.. autosummary:: + :toctree: . + + ui.UserInterface diff --git a/docs/data.rst b/docs/data.rst index 1cefc1745..21ac5972c 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -1,22 +1,46 @@ Data ====== +.. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/data_zoo.png + :width: 600px + :align: center + Build data repository locally ------------------------------ -Build a repository structure: -1. Choose a directory to dedicate to the data base, called root in the following. -2. Make subfolders in root for each organism for which you want to build a data base. -3. Make subfolders for each organ whithin each organism for which you want to build a data base. +Build a repository structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + 1. Choose a directory to dedicate to the data base, called root in the following. + 2. Make subfolders in root for each organism for which you want to build a data base. + 3. Make subfolders for each organ whithin each organism for which you want to build a data base. + +We maintain a couple of download scripts that automatise this process, which have to be executed in a shell once to download specific subsets of the full data zoo. +These scripts can be found in sfaira.data.download_scripts. + +Use 3rd party repositories +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Some organization provide streamlined data objects that can be directly consumed by data zoos such as sfaira. +One example for such an organization is the cellxgene_ data portal. +Through these repositories, one can easily build or extend a collection of data sets that can be easily interfaced with sfaira. +Data loaders for cellxgene structured data objects will be available soon! +Contact us for support of any other repositories. + +.. _cellxgene: https://cellxgene.cziscience.com/ + +Add data sets +~~~~~~~~~~~~~ -Add data sets: -4. For each species and organ combination, choose the data sets that you want to use. -5. Identify the raw files as indicated in the data loader classes and copy them into the folder. Use processed data -using the described processing if this is required: This is usually done to speed up loading for file -formats that are difficult to access. + 4. For each species and organ combination, choose the data sets that you want to use. + 5. Identify the raw files as indicated in the data loader classes and copy them into the folder. Use processed data + using the described processing if this is required: This is usually done to speed up loading for file + formats that are difficult to access. + +Data loaders +------------ Use data loaders on existing data repository --------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You only want to use data sets with existing data loaders and have adapted your directory structure as above? In that case, you can immediately start using the data loader functions, you just need to supply the root directory @@ -25,10 +49,8 @@ Depending on the functionalities you want to use, you need to create a directory can be easily done via the data set api itself, example python scripts are under benchmarks/data_preparation. This meta information is necessary to anticipate file sizes for backing merged adata objects for example. -TODO example. - Contribute data loaders ------------------------ +~~~~~~~~~~~~~~~~~~~~~~~ Each data set (organsism, organ, protocol, optionally also batches) has its own data loader class. Each such class is in a separate file and inherits from a base class that contains most functionalities. Accordingly, the data loader class @@ -74,7 +96,7 @@ before it is loaded into memory: if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "human/eye/my_data.h5ad") defined file in streamlined directory structure + fn = os.path.join(self.path, "human", "eye", "my_data.h5ad") defined file in streamlined directory structure self.adata = anndata.read(fn) # loading instruction into .adata, use other ones if the data is not h5ad self.adata.uns["lab"] = x # load the adata.uns with meta data @@ -108,13 +130,59 @@ in which local data and cell type annotation can be managed separately but still The data loaders and cell type annotation formats between sfaira and sfaira_extensions are identical and can be easily copied over. - -Handling ontologies in data loaders ------------------------------------ - -Each data loader has a versioned cell type annotation map, a dictionary. -This dictionary allows mapping of the cell type annotations that come with the raw form of the data set to the cell type -universe or ontology terms defined in sfaira, this is, however, only done upon loading of the data (.load()). -The outcome of this map is a new set of cell type labels that can be propagated to leave nodes of the ontology graph. -This dictionary requires a new entry for each new version of the corresponding cell type universe. - +Ontology management +------------------- + +Sfaira maintains versioned cell type universes and ontologies by species and organ. +A cell type universe is a list of the unique, most fine-grained cell type definitions available. +These cell types can be referred to by a human readable cell type name or a structure identifier within an ontology, +an ontology ID. +Often, one is also interested in access to more coarse grained groups of cell types, for example if the data quality +does not allow to distinguish between T cell subtypes. +To allow coarser type definition, sfaira maintains hierarchies of cell types, in which each hierarchical level is again +defined by a cell type identifier. +Such a hierarchy can be writted as directed acyclic graph which has the cell type universe as its leave nodes. +Intuitively, the cell type hierarchy graph depends on the cell type universe. +Accordingly, both are versioned together in sfaira: +Updates in the cell type universe, such as discovery of a new cell type, lead to an update of the ontology and an +incrementation in both of their versions. +These versioned changes materialise as a distinct list (universe) and dictionary (ontology) for each version in the +file that harbors the species- and organ-specific class that inherits from CelltypeVersionsBase and thus are available +even after updates. +This versioning without depreceation of the old objects allows sfaira to execute and train models that were designed +for older cell type universes and thus ensures reproducibility. + +Contribute cell types to ontologies +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To contibute new cell types or change existing cell type universe entries, the cell type universe version has to be +incremented and the new entry can simply be added to the list or modified in the list. +We do not increment the universe version if a change does not influence the identity of a leave node with respect to +the other types in the universe, ie if it simply changes the spelling of a cell type or if an onology ID is added to +a type that previously did not have one. + +Contribute hierarchies to ontologies +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To contribute a term to a cell type ontology, one just has to add a dictionary item that defines the new term as a set +of the leave nodes (cell type universe) of the corresponding universe version. + + +Using ontologies to train cell type classifiers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Cell type classifiers can be trained on data sets with different coarsity of cell type annotation using aggregate +cross-entropy as a loss and aggregate accuracy as a metric. +The one-hot encoded cell type label matrix is accordingly modified in the estimator class in data loading if terms +that correspond to intermediate nodes (rather than leave nodes) are encountered in the label set. + +Genome management +----------------- + +We streamline feature spaces used by models by defining standardized gene sets that are used as model input. +Per default, sfaira works with the protein coding genes of a genome assembly right now. +A model topology version includes the genome it was trained for, which also defines the feature of this model as genes. +As genome assemblies are updated, model topology version can be updated and models retrained to reflect these changes. +Note that because protein coding genes do not change drastically between genome assemblies, +sample can be carried over to assemblies they were not aligned against by matching gene identifiers. +Sfaira automatically tries to overlap gene identifiers to the genome assembly selected through the current model. diff --git a/docs/ecosystem.rst b/docs/ecosystem.rst new file mode 100644 index 000000000..aca80b451 --- /dev/null +++ b/docs/ecosystem.rst @@ -0,0 +1,61 @@ +Ecosystem +========= + +scanpy +------ + +scanpy_ provides an environment of tools that can be used to analysis single-cell data in python. +sfaira allows users to easily query third party data sets and models to complement these analysis workflows. + +.. _scanpy: https://github.com/theislab/scanpy + +Data zoo +-------- + +Data providers which streamline data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some organization provide streamlined data objects that can be directly consumed by data zoos such as sfaira. +Examples for such data providers are: + + - Human Cell Atlas data portal (HCA DCP_) + - cellxgene_ data portal + - Broad_ institute single cell data portal + - EBI_ single cell expression atlas + +Through these repositories, one can easily build or extend a collection of data sets that can be easily interfaced with sfaira. +Data loaders for cellxgene structured data objects will be available soon, we are working on interfacing more such organisations! +Contact us for support of any other repositories. + +.. _DCP: https://data.humancellatlas.org/explore/ +.. _cellxgene: https://cellxgene.cziscience.com/ +.. _Broad: https://singlecell.broadinstitute.org/single_cell +.. _EBI: https://www.ebi.ac.uk/gxa/sc/home + + +Study-centric data set servers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Many authors of data sets provide their data sets on servers: + + - GEO_ + - cloud storage servers + - manuscript supplements + +Our data zoo interface is able to represent these data sets such that they can be queried in a streamlined fashion, +together with many other data sets. + +.. _GEO: https://www.ncbi.nlm.nih.gov/geo/ + + +Single-cell study look-up tables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Svensson_ et al. published a single-cell database_ in the form of a table in which each row contains a description of a study which published single-cell RNA-seq data. +Some of these data sets are already included in sfaira, +consider also our interactive website_ for a graphical user interface to our complete data zoo. +Note that this website can be used as a look-up table but sfaira also allows you to directly load and interact with these data sets. + +.. _Svensson: https://academic.oup.com/database/article/doi/10.1093/database/baaa073/6008692 +.. _database: https://www.nxn.se/single-cell-studies/gui +.. _website: https://theislab.github.io/sfaira-site/index.html diff --git a/docs/environment_brief.rst b/docs/environment_brief.rst new file mode 100644 index 000000000..6cca3acde --- /dev/null +++ b/docs/environment_brief.rst @@ -0,0 +1,26 @@ +.. role:: small +.. role:: smaller + +sfaira fits into an environment of many other project centred on making data and models accessible. + +Data zoo +~~~~~~~~ + +We focus on providing a python interface to interact with locally stored data set collections +without requiring dedicated data reading and annotation harmonisation scripts: +These code blocks are absorbed into our data zoo backend and can be conveniently triggered with short commands. + + +Model zoo +~~~~~~~~~ + +A large body of recent research has been devoted to improving models that learn representation of cell captured with single-cell RNA-seq. +These models include embedding models such as autoencoders and cell type prediction models. +Many of these models are implemented in software packages and can be deployed on new data sets. +In many of these cases, it also makes sense to use pre-trained models to leverage previously published modelling results. +We provide a single interface to interact with such pre-trained models which abstracts model settings into a API +so that users can easily switch between different pre-trained models. +Importantly, model execution is performed locally so that data does not have to be uploaded to external servers +and model storage is decentral so that anybody can contribute models easily. +Users benefit from easy, streamlined access to models that can be used in analysis workflows, +developers benefit from being able to deploy models to a large community of users without having to set up a model zoo. diff --git a/docs/genomes.rst b/docs/genomes.rst deleted file mode 100644 index d144348e5..000000000 --- a/docs/genomes.rst +++ /dev/null @@ -1,14 +0,0 @@ -Genomes -========== - -Introduction to sfaira genome assembly management -------------------------------------------------- - - -Contribute genome assemblies to sfaira --------------------------------------- - - -Use a model architecture on a new genome assembly --------------------------------------------------- - diff --git a/docs/index.rst b/docs/index.rst index 9c0c95392..c699c8c1d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,28 +1,46 @@ -.. You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. +|Stars| |PyPI| |PyPIDownloads| +.. |Stars| image:: https://img.shields.io/github/stars/theislab/sfaira?logo=GitHub&color=yellow + :target: https://github.com/theislab/sfaira/stargazers +.. |PyPI| image:: https://img.shields.io/pypi/v/sfaira?logo=PyPI + :target: https://pypi.org/project/sfaira +.. |PyPIDownloads| image:: https://pepy.tech/badge/sfaira + :target: https://pepy.tech/project/sfaira +sfaira - data and model repository for single-cell data +======================================================= -Welcome to sfaira's documentation! -==================================== +.. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/concept.png + :width: 600px + :align: center + +sfaira_ is a model and a data repository in a single python package. +We provide an interactive overview of the current state of the zoos on sfaira-site_. + +.. _sfaira: https://sfaira.readthedocs.io +.. _sfaira-site: https://theislab.github.io/sfaira-site/index.html + +.. include:: environment_brief.rst + +News +---- + +.. include:: news.rst + +Latest additions +---------------- + +.. include:: release-latest.rst .. toctree:: - :maxdepth: 2 - :caption: Contents: + :maxdepth: 1 + :hidden: installation api/index tutorials - models data - ontologies - genomes - training - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` + models + ecosystem + roadmap + release-notes diff --git a/docs/models.rst b/docs/models.rst index a8b3f44ed..89b6f9545 100644 --- a/docs/models.rst +++ b/docs/models.rst @@ -1,12 +1,24 @@ Models ====== -Introduction to sfaira model management ---------------------------------------- +.. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/model_zoo.png + :width: 600px + :align: center + +User interface +-------------- + +The user interface allows users to query model code and parameter estimates to run on local data. +It takes care of downloading model parameters from the relevant cloud storage, loading parameters into a model instance locally and performing the forward pass. +With the user interface, users only have to worry about which model they want to execute, but now how this is facilitated. + + +Model management +---------------- A sfaira model is a class that inherits from BasicModel which defines a tf.keras.models.Model in self.training_model. This training_model describes the full forward pass. Additionally, embedding models also have an attribute X, a -tf.keras.models.Model that desribes the partial forward pass into the embedding layer. +tf.keras.models.Model that describes the partial forward pass into the embedding layer. Such a model class, e.g. ModelX, is wrapped by an inheriting class ModelXVersioned, that handles properties of the model architecture. @@ -16,11 +28,19 @@ In particular, ModelXVersioned - has access to a map of a version ID to an architectural hyperparameter setting (Topologies), allowing this class to set depth, width, etc of the model directly based on the name of the yielded model. - has access to the feature space of the model, including its gene names, which are defined by the model topology in Topologies - Contribute models ------------------ +~~~~~~~~~~~~~~~~~ Models can be contributed and used in two ways - Full model code in sfaira repo - - Sfaira compatible model code in external package (to come) + - sfaira compatible model code in external package (to come) + +Training +-------- + +Estimator classes +~~~~~~~~~~~~~~~~~ + +We define estimator classes that have model instances as an attribute, that orchestrate all major aspects of model +fitting, such as a data loading, data streaming and model evaluation. \ No newline at end of file diff --git a/docs/news.rst b/docs/news.rst new file mode 100644 index 000000000..632cf17ac --- /dev/null +++ b/docs/news.rst @@ -0,0 +1 @@ +No news yet, stay tuned! diff --git a/docs/ontologies.rst b/docs/ontologies.rst deleted file mode 100644 index 0091c9a45..000000000 --- a/docs/ontologies.rst +++ /dev/null @@ -1,48 +0,0 @@ -Ontologies -========== - -Introduction to sfaira ontology management ------------------------------------------- - -Sfaira maintains versioned cell type universes and ontologies by species and organ. -A cell type universe is a list of the unique, most fine-grained cell type definitions available. -These cell types can be referred to by a human readable cell type name or a structure identifier within an ontology, -an ontology ID. -Often, one is also interested in access to more coarse grained groups of cell types, for example if the data quality -does not allow to distinguish between T cell subtypes. -To allow coarser type definition, sfaira maintains hierarchies of cell types, in which each hierarchical level is again -defined by a cell type identifier. -Such a hierarchy can be writted as directed acyclic graph which has the cell type universe as its leave nodes. -Intuitively, the cell type hierarchy graph depends on the cell type universe. -Accordingly, both are versioned together in sfaira: -Updates in the cell type universe, such as discovery of a new cell type, lead to an update of the ontology and an -incrementation in both of their versions. -These versioned changes materialise as a distinct list (universe) and dictionary (ontology) for each version in the -file that harbors the species- and organ-specific class that inherits from CelltypeVersionsBase and thus are available -even after updates. -This versioning without depreceation of the old objects allows sfaira to execute and train models that were designed -for older cell type universes and thus ensures reproducibility. - -Contribute cell types to ontologies ------------------------------------ - -To contibute new cell types or change existing cell type universe entries, the cell type universe version has to be -incremented and the new entry can simply be added to the list or modified in the list. -We do not increment the universe version if a change does not influence the identity of a leave node with respect to -the other types in the universe, ie if it simply changes the spelling of a cell type or if an onology ID is added to -a type that previously did not have one. - -Contribute hierarchies to ontologies ------------------------------------- - -To contribute a term to a cell type ontology, one just has to add a dictionary item that defines the new term as a set -of the leave nodes (cell type universe) of the corresponding universe version. - - -Using ontologies to train cell type classifiers ------------------------------------------------ - -Cell type classifiers can be trained on data sets with different coarsity of cell type annotation using aggregate -cross-entropy as a loss and aggregate accuracy as a metric. -The one-hot encoded cell type label matrix is accordingly modified in the estimator class in data loading if terms -that correspond to intermediate nodes (rather than leave nodes) are encountered in the label set. diff --git a/docs/release-latest.rst b/docs/release-latest.rst new file mode 100644 index 000000000..913476eb3 --- /dev/null +++ b/docs/release-latest.rst @@ -0,0 +1,6 @@ +.. role:: small +.. role:: smaller + +0.2.1 :small:`2020-09-7` +~~~~~~~~~~~~~~~~~~~~~~~~ +Initial release with online documentation. diff --git a/docs/release-notes.rst b/docs/release-notes.rst new file mode 100644 index 000000000..7f8561271 --- /dev/null +++ b/docs/release-notes.rst @@ -0,0 +1,11 @@ +Release Notes +============= + +.. role:: small +.. role:: smaller + + +Version 0.2 +----------- + +.. include:: release-latest.rst diff --git a/docs/roadmap.rst b/docs/roadmap.rst new file mode 100644 index 000000000..69b1a3ee2 --- /dev/null +++ b/docs/roadmap.rst @@ -0,0 +1,20 @@ +Roadmap +======= + +Cell ontologies +~~~~~~~~~~~~~~~ +We are currently migrating our ontology to use the Cell Ontology_ as a backbone. +For details, read through this milestone_. + +.. _Ontology: http://www.obofoundry.org/ontology/cl.html +.. _milestone: https://github.com/theislab/sfaira/milestone/1 + + +Interface online data repositories +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We are preparing to interface online data repositories which provide streamlined data. +This allows users to build local data set collections more easily because these providers usually have a clear download interface, +consider the cellxgene_ data portal for example. +We aim to represent both these data set portals and data sets that have not been streamlined in such a fashion to provide a comprehensive collection of as many data sets as possible. + +.. _cellxgene: https://cellxgene.cziscience.com/ diff --git a/docs/training.rst b/docs/training.rst deleted file mode 100644 index f15189d9a..000000000 --- a/docs/training.rst +++ /dev/null @@ -1,8 +0,0 @@ -Training -========= - -Introduction to sfaira estimator classes ----------------------------------------- - -We define estimator classes that have model instances as an attribute, that orchestrate all major aspects of model -fitting, such as a data loading, data streaming and model evaluation. diff --git a/docs/tutorials.rst b/docs/tutorials.rst index bb1654138..a644fc8bf 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -1,4 +1,17 @@ Tutorials ========= -To come shortly! +Queries to data zoo +------------------- + +We provide a tutorial for queries to the data zoo through our python API (dataloaders_) and for assembling meta data across the zoo (metadata_). + +.. _dataloaders: https://nbviewer.jupyter.org/github.com/theislab/sfaira_tutorials/blob/master/tutorials/data_loaders.ipynb +.. _metadata: https://nbviewer.jupyter.org/github.com/theislab/sfaira_tutorials/blob/master/tutorials/meta_data.ipynb + +Queries to model zoo +-------------------- + +We provide a tutorial for interacting with our model zoo through a python API in a scanpy workflow through our `user interface`_ + +.. _user interface: https://nbviewer.jupyter.org/github.com/theislab/sfaira_tutorials/blob/master/tutorials/user_interface.ipynb diff --git a/requirements.txt b/requirements.txt index 8c0d197f1..2ecab3f17 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ sphinx sphinx-autodoc-typehints sphinx_rtd_theme tqdm +requests diff --git a/resources/images/concept.jpeg b/resources/images/concept.jpeg deleted file mode 100644 index 77b1822e0..000000000 Binary files a/resources/images/concept.jpeg and /dev/null differ diff --git a/resources/images/concept.png b/resources/images/concept.png new file mode 100644 index 000000000..ed26f5ebe Binary files /dev/null and b/resources/images/concept.png differ diff --git a/resources/images/data_zoo.png b/resources/images/data_zoo.png new file mode 100644 index 000000000..b570e53ee Binary files /dev/null and b/resources/images/data_zoo.png differ diff --git a/resources/images/model_zoo.png b/resources/images/model_zoo.png new file mode 100644 index 000000000..5f8e8308a Binary files /dev/null and b/resources/images/model_zoo.png differ diff --git a/setup.py b/setup.py index 2fa199560..95e4de8ce 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,9 @@ 'extension': [ "sfaira_extension", ], + 'zenodo': [ + "requests" + ], 'docs': [ 'sphinx', 'sphinx-autodoc-typehints', diff --git a/sfaira/__init__.py b/sfaira/__init__.py index 924940aaa..50910f0a2 100644 --- a/sfaira/__init__.py +++ b/sfaira/__init__.py @@ -1,11 +1,25 @@ +# -*- coding: utf-8 -*- +"""A Data and Model Zoo for Single-Cell Genomics.""" + from ._version import get_versions __version__ = get_versions()['version'] del get_versions - +__maintainer__ = ', '.join([ + "Leander Dony", + "David S. Fischer" +]) __author__ = ', '.join([ - 'theislab' + "Leander Dony", + "David S. Fischer" ]) __email__ = ', '.join([ - 'david.fischer@helmholtz-muenchen.de' -]) \ No newline at end of file + "leander.dony@helmholtz-muenchen.de", + "david.fischer@helmholtz-muenchen.de" +]) + +import sfaira.data +import sfaira.genomes +import sfaira.models +import sfaira.train +import sfaira.interface as ui diff --git a/sfaira/api/__init__.py b/sfaira/api/__init__.py index 2caf3b54c..e69de29bb 100644 --- a/sfaira/api/__init__.py +++ b/sfaira/api/__init__.py @@ -1,6 +0,0 @@ -from . import consts -from . import data -from . import genomes -from . import models -from . import train -from . import ui diff --git a/sfaira/api/data.py b/sfaira/api/data.py deleted file mode 100644 index 4ff0d90f7..000000000 --- a/sfaira/api/data.py +++ /dev/null @@ -1,3 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase, DatasetSuperGroup -from sfaira.data import mouse -from sfaira.data import human diff --git a/sfaira/api/genomes.py b/sfaira/api/genomes.py deleted file mode 100644 index 1d3783d36..000000000 --- a/sfaira/api/genomes.py +++ /dev/null @@ -1 +0,0 @@ -from sfaira.genomes import ExtractFeatureListEnsemble diff --git a/sfaira/api/models.py b/sfaira/api/models.py deleted file mode 100644 index 9565f986d..000000000 --- a/sfaira/api/models.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.models import celltype -from sfaira.models import embedding diff --git a/sfaira/api/train.py b/sfaira/api/train.py deleted file mode 100644 index 41b083cf9..000000000 --- a/sfaira/api/train.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.train import GridsearchContainer, SummarizeGridsearchEmbedding, SummarizeGridsearchCelltype -from sfaira.train import TrainModelEmbedding, TrainModelCelltype, TargetZoos diff --git a/sfaira/api/ui.py b/sfaira/api/ui.py deleted file mode 100644 index 135b63440..000000000 --- a/sfaira/api/ui.py +++ /dev/null @@ -1 +0,0 @@ -from sfaira.interface import UserInterface diff --git a/sfaira/data/__init__.py b/sfaira/data/__init__.py index 209809e16..896eb5de2 100644 --- a/sfaira/data/__init__.py +++ b/sfaira/data/__init__.py @@ -1,3 +1,4 @@ from .base import DatasetBase, DatasetGroupBase, DatasetSuperGroup from . import mouse -from . import human \ No newline at end of file +from . import human +from .interactive import DatasetInteractive diff --git a/sfaira/data/base.py b/sfaira/data/base.py index cb7035a44..c6b55ac21 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -98,10 +98,11 @@ def load( self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_id] = None # Map cell type names from raw IDs to ontology maintained ones:: - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.map_ontology_class( - raw_ids=self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values, - celltype_version=celltype_version - ) + if ADATA_IDS_SFAIRA.cell_ontology_class in self.adata.obs.columns: + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.map_ontology_class( + raw_ids=self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values, + celltype_version=celltype_version + ) # Remove version tag on ensembl gene ID so that different versions are superimposed downstream: if remove_gene_version: @@ -157,7 +158,7 @@ def load( elif isinstance(self.adata.X, scipy.sparse.spmatrix): x = self.adata.X.tocsc() else: - raise ValueError("data type %s not recognized" % type(self.adata.X)) + raise ValueError(f"Data type {type(self.adata.X)} not recognized.") # Compute indices of genes to keep data_ids = self.adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl].values @@ -353,11 +354,11 @@ def load_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: np.ndar ])) ) else: - raise ValueError("did not reccognize backed AnnData.X format %s" % type(adata_backed.X)) + raise ValueError(f"Did not reccognize backed AnnData.X format {type(adata_backed.X)}") def set_unkown_class_id(self, ids: list): """ - Sets list of custom identifiers of unkown cell types in adata.obs["cell_ontology_class"] to the target one. + Sets list of custom identifiers of unknown cell types in adata.obs["cell_ontology_class"] to the target one. :param ids: IDs in adata.obs["cell_ontology_class"] to replace. :return: @@ -384,7 +385,7 @@ def _set_genome(self, genome=genome ) else: - raise ValueError("genomes %s not recognised. please provide valid genomes." % genome) + raise ValueError(f"Genome {genome} not recognised. Needs to start with 'Mus_Musculus' or 'Homo_Sapiens'.") self.genome_container = g @@ -908,7 +909,7 @@ def get_gc( genome=genome ) else: - raise ValueError("genomes %s not recognised. please provide valid genomes." % genome) + raise ValueError(f"Genome {genome} not recognised. Needs to start with 'Mus_Musculus' or 'Homo_Sapiens'.") return g def ncells(self, annotated_only: bool = False): @@ -1016,7 +1017,10 @@ def load_all_tobacked( self.adata.filename = fn_backed # setting this attribute switches this anndata to a backed object # Note that setting .filename automatically redefines .X as dense, so we have to redefine it as sparse: if not as_dense: - self.adata.X = scipy.sparse.csr_matrix(self.adata.X) # redefines this backed anndata as sparse + X = scipy.sparse.csr_matrix(self.adata.X) # redefines this backed anndata as sparse + X.indices = X.indices.astype(np.int64) + X.indptr = X.indptr.astype(np.int64) + self.adata.X = X keys = [ ADATA_IDS_SFAIRA.author, ADATA_IDS_SFAIRA.year, diff --git a/sfaira/data/human/adipose/human_adipose.py b/sfaira/data/human/adipose/human_adipose.py index b47b77d89..9994507ab 100644 --- a/sfaira/data/human/adipose/human_adipose.py +++ b/sfaira/data/human/adipose/human_adipose.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupAdipose().datasets) + from sfaira_extension.data.human import DatasetGroupAdipose + self.datasets.update(DatasetGroupAdipose().datasets) except ImportError: pass diff --git a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py index 35ee5f198..cc711c8b0 100644 --- a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py +++ b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultAdipose' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adipose/hcl_AdultAdipose_1.h5ad") + fn = os.path.join(self.path, "human", "adipose", "hcl_AdultAdipose_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland.py b/sfaira/data/human/adrenalgland/human_adrenalgland.py index 8fad6089d..234a563a9 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland.py @@ -30,7 +30,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupAdrenalgland().datasets) + from sfaira_extension.data.human import DatasetGroupAdrenalgland + self.datasets.update(DatasetGroupAdrenalgland().datasets) except ImportError: pass diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py index f7ff1aea5..06db980fe 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'NeonatalAdrenalGland' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adrenalgland/hcl_NeonatalAdrenalGland_1.h5ad") + fn = os.path.join(self.path, "human", "adrenalgland", "hcl_NeonatalAdrenalGland_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py index 008f50462..fcd778b07 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalAdrenalGland' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_2.h5ad") + fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py index e4b975df6..0e110e5a0 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalAdrenalGland' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_3.h5ad") + fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py index 4a8e189ce..7772eb398 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultAdrenalGland' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adrenalgland/hcl_AdultAdrenalGland_3.h5ad") + fn = os.path.join(self.path, "human", "adrenalgland", "hcl_AdultAdrenalGland_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py index 17c79d022..2314f8b43 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalAdrenalGland' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_4.h5ad") + fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py index 4a0eb5e51..37069e26c 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultAdrenalGland' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adrenalgland/hcl_AdultAdrenalGland_2.h5ad") + fn = os.path.join(self.path, "human", "adrenalgland", "hcl_AdultAdrenalGland_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/artery/human_artery.py b/sfaira/data/human/artery/human_artery.py index 93af2dd06..4575d1b6a 100644 --- a/sfaira/data/human/artery/human_artery.py +++ b/sfaira/data/human/artery/human_artery.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupArtery().datasets) + from sfaira_extension.data.human import DatasetGroupArtery + self.datasets.update(DatasetGroupArtery().datasets) except ImportError: pass diff --git a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py index 9b05fe785..273df1b58 100644 --- a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py +++ b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultArtery' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/artery/hcl_AdultArtery_1.h5ad") + fn = os.path.join(self.path, "human", "artery", "hcl_AdultArtery_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/bladder/human_bladder.py b/sfaira/data/human/bladder/human_bladder.py index e9bf573ff..d31620a7f 100644 --- a/sfaira/data/human/bladder/human_bladder.py +++ b/sfaira/data/human/bladder/human_bladder.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupBladder().datasets) + from sfaira_extension.data.human import DatasetGroupBladder + self.datasets.update(DatasetGroupBladder().datasets) except ImportError: pass diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py index b8969b60f..de82b63e6 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultBladder' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/bladder/hcl_AdultBladder_1.h5ad") + fn = os.path.join(self.path, "human", "bladder", "hcl_AdultBladder_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py index 281512488..5d4c7e400 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultBladder' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/bladder/hcl_AdultBladder_2.h5ad") + fn = os.path.join(self.path, "human", "bladder", "hcl_AdultBladder_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py index 81fc62825..945a5205b 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultBladder' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/bladder/hcl_AdultGallbladder_2.h5ad") + fn = os.path.join(self.path, "human", "bladder", "hcl_AdultGallbladder_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood.py b/sfaira/data/human/blood/human_blood.py index d0b16e5ca..e51216bce 100644 --- a/sfaira/data/human/blood/human_blood.py +++ b/sfaira/data/human/blood/human_blood.py @@ -36,7 +36,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupBlood().datasets) + from sfaira_extension.data.human import DatasetGroupBlood + self.datasets.update(DatasetGroupBlood().datasets) except ImportError: pass diff --git a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py index b27a2bb0d..8e4749b9d 100644 --- a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py +++ b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py @@ -37,6 +37,7 @@ def __init__( self.species = "human" self.id = "human_blood_2018_10x_ica_001_unknown" self.download_website = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" + self.download_website_meta = None self.organ = "blood" self.sub_tissue = "umbilical_cord_blood" self.has_celltypes = False @@ -51,7 +52,7 @@ def _load(self, fn=None): if self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") + fn = os.path.join(self.path, "human", "blood", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") self.adata = anndata.read_loom(fn) idx = np.logical_and((self.adata.obs['derived_organ_parts_label'] == 'umbilical cord blood').values, (self.adata.obs['emptydrops_is_cell'] == 't').values) @@ -59,7 +60,7 @@ def _load(self, fn=None): else: if fn is None: - fn = os.path.join(self.path, "human/blood/ica_blood.h5ad") + fn = os.path.join(self.path, "human", "blood", "ica_blood.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Regev' diff --git a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py index 4b0da8c61..cc70f0b67 100644 --- a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py @@ -29,6 +29,7 @@ def __init__( self.species = "human" self.id = "human_blood_2019_10x_10xGenomics_001_unknown" self.download_website = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" + self.download_website_meta = None self.organ = "blood" self.sub_tissue = "pbmcs" self.has_celltypes = False @@ -43,7 +44,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") + fn = os.path.join(self.path, "human", "blood", "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = '10x Genomics' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py index 9395c471d..8c2f78fe2 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultPeripheralBlood' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_AdultPeripheralBlood_3.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_AdultPeripheralBlood_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py index 3ea9979f5..ae11c35a7 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultPeripheralBlood' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_PeripheralBlood_1.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_PeripheralBlood_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py index 333cf7daa..7ab28023a 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'CordBlood' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_CordBlood_2.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_CordBlood_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py index fd54c0efa..9fb895ef0 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultPeripheralBlood' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_AdultPeripheralBlood_4.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_AdultPeripheralBlood_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py index 66faeca5c..770fb5c5c 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'CordBloodCD34P' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_CordBloodCD34P_1.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_CordBloodCD34P_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py index 3f60960fb..36a2d5662 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'CordBloodCD34P' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_CordBloodCD34P_2.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_CordBloodCD34P_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py index e54565913..bbf76f52a 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'CordBlood' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_CordBlood_1.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_CordBlood_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/bone/human_bone.py b/sfaira/data/human/bone/human_bone.py index 68e5f868f..4cc46ad77 100644 --- a/sfaira/data/human/bone/human_bone.py +++ b/sfaira/data/human/bone/human_bone.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupBone().datasets) + from sfaira_extension.data.human import DatasetGroupBone + self.datasets.update(DatasetGroupBone().datasets) except ImportError: pass diff --git a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py index dc5e2c45e..68b08699f 100644 --- a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py +++ b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py @@ -37,6 +37,7 @@ def __init__( self.species = "human" self.id = "human_bone_2018_10x_ica_unknown" self.download_website = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" + self.download_website_meta = None self.organ = "bone" self.sub_tissue = "bone_marrow" self.has_celltypes = False @@ -51,7 +52,7 @@ def _load(self, fn=None): if self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/bone/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") + fn = os.path.join(self.path, "human", "bone", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") self.adata = anndata.read_loom(fn) idx = np.logical_and((self.adata.obs['derived_organ_parts_label'] == 'bone marrow').values, (self.adata.obs['emptydrops_is_cell'] == 't').values) @@ -59,7 +60,7 @@ def _load(self, fn=None): else: if fn is None: - fn = os.path.join(self.path, "human/bone/ica_bone.h5ad") + fn = os.path.join(self.path, "human", "bone", "ica_bone.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Regev' diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py index e2bfc7cfe..027972939 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultBoneMarrow' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/bone/hcl_BoneMarrow_1.h5ad") + fn = os.path.join(self.path, "human", "bone", "hcl_BoneMarrow_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py index fa2ee77d1..00d11f26f 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultBoneMarrow' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/bone/hcl_BoneMarrow_2.h5ad") + fn = os.path.join(self.path, "human", "bone", "hcl_BoneMarrow_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/brain/human_brain.py b/sfaira/data/human/brain/human_brain.py index 4d15fdf2f..3f2eedda9 100644 --- a/sfaira/data/human/brain/human_brain.py +++ b/sfaira/data/human/brain/human_brain.py @@ -32,7 +32,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupBrain().datasets) + from sfaira_extension.data.human import DatasetGroupBrain + self.datasets.update(DatasetGroupBrain().datasets) except ImportError: pass diff --git a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py index b2fc47640..2ae25a98c 100644 --- a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_brain_2017_DroNcSeq_habib_001_10.1038/nmeth.4407" self.download_website = "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad" + self.download_website_meta = None self.organ = "brain" self.sub_tissue = "hippocampus, prefrontal cortex" self.has_celltypes = True @@ -57,7 +58,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/habib17.processed.h5ad") + fn = os.path.join(self.path, "human", "brain", "habib17.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py index 2fa168dfc..b257f59af 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalBrain' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_4.h5ad") + fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py index 430ad3f0d..31a52e85d 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalBrain' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_5.h5ad") + fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_5.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py index 8c9d115f5..9ab01e020 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalBrain' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_3.h5ad") + fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py index c35ac3d57..5dcb7bd39 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultTemporalLobe' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/hcl_AdultTemporalLobe_1.h5ad") + fn = os.path.join(self.path, "human", "brain", "hcl_AdultTemporalLobe_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py index d31e0665d..24b5636fd 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalBrain' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_6.h5ad") + fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_6.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py index fc030ab12..a812123a9 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultCerebellum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/hcl_AdultCerebellum_1.h5ad") + fn = os.path.join(self.path, "human", "brain", "hcl_AdultCerebellum_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/calvaria/human_calvaria.py b/sfaira/data/human/calvaria/human_calvaria.py index 431b6ba10..3a101f22e 100644 --- a/sfaira/data/human/calvaria/human_calvaria.py +++ b/sfaira/data/human/calvaria/human_calvaria.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupCalvaria().datasets) + from sfaira_extension.data.human import DatasetGroupCalvaria + self.datasets.update(DatasetGroupCalvaria().datasets) except ImportError: pass diff --git a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py index 62a347869..d370f75b5 100644 --- a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py +++ b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalCalvaria' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/calvaria/hcl_FetalCalvaria_1.h5ad") + fn = os.path.join(self.path, "human", "calvaria", "hcl_FetalCalvaria_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/cervix/human_cervix.py b/sfaira/data/human/cervix/human_cervix.py index 14116d928..eb65f1a1f 100644 --- a/sfaira/data/human/cervix/human_cervix.py +++ b/sfaira/data/human/cervix/human_cervix.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupCervix().datasets) + from sfaira_extension.data.human import DatasetGroupCervix + self.datasets.update(DatasetGroupCervix().datasets) except ImportError: pass diff --git a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py index c3cad3af5..65e54254f 100644 --- a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py +++ b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultCervix' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/cervix/hcl_AdultCervix_1.h5ad") + fn = os.path.join(self.path, "human", "cervix", "hcl_AdultCervix_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus.py index 8ce4d6c0f..5fcebd370 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupChorionicvillus().datasets) + from sfaira_extension.data.human import DatasetGroupChorionicvillus + self.datasets.update(DatasetGroupChorionicvillus().datasets) except ImportError: pass diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py index c4fe7b110..cdb2c119a 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'ChorionicVillus' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/chorionicvillus/hcl_ChorionicVillus_1.h5ad") + fn = os.path.join(self.path, "human", "chorionicvillus", "hcl_ChorionicVillus_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/colon/human_colon.py b/sfaira/data/human/colon/human_colon.py index 8dc44ab6f..d86d094f4 100644 --- a/sfaira/data/human/colon/human_colon.py +++ b/sfaira/data/human/colon/human_colon.py @@ -34,7 +34,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupColon().datasets) + from sfaira_extension.data.human import DatasetGroupColon + self.datasets.update(DatasetGroupColon().datasets) except ImportError: pass diff --git a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py index e5406c54a..8d3f7d7ea 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py @@ -64,6 +64,7 @@ def __init__( self.species = "human" self.id = "human_colon_2019_10x_kinchen_001_10.1016/j.cell.2018.08.067" self.download_website = "https://data.humancellatlas.org/project-assets/project-matrices/f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom" + self.download_website_meta = 'private' self.organ = "colon" self.sub_tissue = "lamina propria of mucosa of colon" self.has_celltypes = True @@ -94,9 +95,9 @@ def _load(self, fn=None): if self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/colon/f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), - os.path.join(self.path, "human/colon/uc_meta_data_stromal_with_donor.txt"), - os.path.join(self.path, "human/colon/hc_meta_data_stromal_with_donor.txt") + os.path.join(self.path, "human", "colon", "f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), + os.path.join(self.path, "human", "colon", "uc_meta_data_stromal_with_donor.txt"), + os.path.join(self.path, "human", "colon", "hc_meta_data_stromal_with_donor.txt") ] adata = anndata.read_loom(fn[0]) ctuc = pd.read_csv(fn[1], sep='\t') @@ -125,7 +126,7 @@ def _load(self, fn=None): else: if fn is None: - fn = os.path.join(self.path, "human/colon/kinchenetal.h5ad") + fn = os.path.join(self.path, "human", "colon", "kinchenetal.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Simmons' diff --git a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py index 3c1f2a1d5..279e8a85b 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py @@ -28,6 +28,7 @@ def __init__( self.species = "human" self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" self.download_website = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" + self.download_website_meta = None self.organ = "colon" self.sub_tissue = "colonic epithelium" self.has_celltypes = True @@ -81,7 +82,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/smillie19_epi.processed.h5ad") + fn = os.path.join(self.path, "human", "colon", "smillie19_epi.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py index 086bd76f1..e50d96f1f 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_colon_2019_10x_wang_001_10.1084/jem.20191130" self.download_website = "https://covid19.cog.sanger.ac.uk/wang20_colon.processed.h5ad" + self.download_website_meta = None self.organ = "colon" self.sub_tissue = "colon" self.has_celltypes = True @@ -49,7 +50,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/wang20_colon.processed.h5ad") + fn = os.path.join(self.path, "human", "colon", "wang20_colon.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py index e40f0105f..641a95723 100644 --- a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py +++ b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py @@ -28,6 +28,7 @@ def __init__( self.species = "human" self.id = "human_colon_2019_10x_james_001_10.1038/s41590-020-0602-z" self.download_website = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" + self.download_website_meta = None self.organ = "colon" self.sub_tissue = "colonic immune cells" self.has_celltypes = True @@ -68,7 +69,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/james20.processed.h5ad") + fn = os.path.join(self.path, "human", "colon", "james20.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py index cc0c3e9bb..72aa77936 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Colon' self.sub_tissue = 'AdultAscendingColon' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -69,7 +70,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/hcl_AdultAscendingColon_1.h5ad") + fn = os.path.join(self.path, "human", "colon", "hcl_AdultAscendingColon_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py index d0e1eaead..11de7f12f 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Colon' self.sub_tissue = 'AdultTransverseColon' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -69,7 +70,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") + fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py index 6d5225af4..d26e5a99a 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Colon' self.sub_tissue = 'AdultTransverseColon' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -69,7 +70,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") + fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py index cab2ed356..a94dd02d8 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Colon' self.sub_tissue = 'AdultSigmoidColon' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -69,7 +70,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") + fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/duodenum/human_duodenum.py b/sfaira/data/human/duodenum/human_duodenum.py index fcb3b5ccf..367138896 100644 --- a/sfaira/data/human/duodenum/human_duodenum.py +++ b/sfaira/data/human/duodenum/human_duodenum.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupDuodenum().datasets) + from sfaira_extension.data.human import DatasetGroupDuodenum + self.datasets.update(DatasetGroupDuodenum().datasets) except ImportError: pass diff --git a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py index 31b38f35e..d3005d603 100644 --- a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py +++ b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultDuodenum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/duodenum/hcl_AdultDuodenum_1.h5ad") + fn = os.path.join(self.path, "human", "duodenum", "hcl_AdultDuodenum_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/epityphlon/human_epityphlon.py b/sfaira/data/human/epityphlon/human_epityphlon.py index dc49920c8..bbf2297e9 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon.py +++ b/sfaira/data/human/epityphlon/human_epityphlon.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupEpityphlon().datasets) + from sfaira_extension.data.human import DatasetGroupEpityphlon + self.datasets.update(DatasetGroupEpityphlon().datasets) except ImportError: pass diff --git a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py index 0612889dd..ec43b2e98 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py +++ b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultEpityphlon' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/epityphlon/hcl_AdultEpityphlon_1.h5ad") + fn = os.path.join(self.path, "human", "epityphlon", "hcl_AdultEpityphlon_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/esophagus/human_esophagus.py b/sfaira/data/human/esophagus/human_esophagus.py index 7fe9c574f..b1df21e7a 100644 --- a/sfaira/data/human/esophagus/human_esophagus.py +++ b/sfaira/data/human/esophagus/human_esophagus.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupEsophagus().datasets) + from sfaira_extension.data.human import DatasetGroupEsophagus + self.datasets.update(DatasetGroupEsophagus().datasets) except ImportError: pass diff --git a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py index 563b423e1..22376fc4f 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py @@ -27,6 +27,7 @@ def __init__( self.id = "human_esophagus_2019_10x_madissoon_001_10.1101/741405" self.download_website = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" # Associated HCA project: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 + self.download_website_meta = None self.organ = "esophagus" self.sub_tissue = "esophagus" self.has_celltypes = True @@ -60,7 +61,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/esophagus/oesophagus.cellxgene.h5ad") + fn = os.path.join(self.path, "human", "esophagus", "oesophagus.cellxgene.h5ad") self.adata = anndata.read(fn) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py index 0e1a6b012..ad524c341 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Esophagus' self.sub_tissue = 'AdultEsophagus' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -66,7 +67,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/esophagus/hcl_AdultEsophagus_1.h5ad") + fn = os.path.join(self.path, "human", "esophagus", "hcl_AdultEsophagus_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py index 6d2242d04..efdb0d499 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Esophagus' self.sub_tissue = 'AdultEsophagus' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -66,7 +67,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/esophagus/hcl_AdultEsophagus_2.h5ad") + fn = os.path.join(self.path, "human", "esophagus", "hcl_AdultEsophagus_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/eye/human_eye.py b/sfaira/data/human/eye/human_eye.py index 42876bd4f..227bda330 100644 --- a/sfaira/data/human/eye/human_eye.py +++ b/sfaira/data/human/eye/human_eye.py @@ -26,7 +26,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupEye().datasets) + from sfaira_extension.data.human import DatasetGroupEye + self.datasets.update(DatasetGroupEye().datasets) except ImportError: pass diff --git a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py index e01cee387..2c74fd6b1 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_eye_2019_10x_lukowski_001_10.15252/embj.2018100811" self.download_website = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" + self.download_website_meta = None self.organ = "eye" self.sub_tissue = "retina" self.has_celltypes = True @@ -56,7 +57,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/eye/lukowski19.processed.h5ad") + fn = os.path.join(self.path, "human", "eye", "lukowski19.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py index f9c5d497f..621447908 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py @@ -25,6 +25,7 @@ def __init__( self.species = "human" self.id = "human_eye_2019_10x_menon_001_10.1038/s41467-019-12780-8" self.download_website = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" + self.download_website_meta = None self.organ = "eye" self.sub_tissue = "retina" self.has_celltypes = True @@ -49,7 +50,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/eye/menon19.processed.h5ad") + fn = os.path.join(self.path, "human", "eye", "menon19.processed.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Hafler' diff --git a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py index 0c0cb96c9..7aa0c2591 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py @@ -26,6 +26,7 @@ def __init__( self.species = "human" self.id = "human_eye_2019_10x_voigt_001_10.1073/pnas.1914143116" self.download_website = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" + self.download_website_meta = None self.organ = "eye" self.sub_tissue = "retina" self.has_celltypes = True @@ -52,7 +53,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/eye/voigt19.processed.h5ad") + fn = os.path.join(self.path, "human", "eye", "voigt19.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py index bb71f0c01..041fc2c4c 100644 --- a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py +++ b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Eye' self.sub_tissue = 'FetalEyes' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -65,7 +66,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/eye/hcl_FetalEyes_1.h5ad") + fn = os.path.join(self.path, "human", "eye", "hcl_FetalEyes_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube.py b/sfaira/data/human/fallopiantube/human_fallopiantube.py index e8718df1e..739e221a0 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupFallopiantube().datasets) + from sfaira_extension.data.human import DatasetGroupFallopiantube + self.datasets.update(DatasetGroupFallopiantube().datasets) except ImportError: pass diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py index 6961b28c9..c78a1a42c 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultFallopiantube' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/fallopiantube/hcl_AdultFallopiantube_1.h5ad") + fn = os.path.join(self.path, "human", "fallopiantube", "hcl_AdultFallopiantube_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/femalegonad/human_femalegonad.py b/sfaira/data/human/femalegonad/human_femalegonad.py index 14897f050..a4b4745fb 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad.py +++ b/sfaira/data/human/femalegonad/human_femalegonad.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupFemalegonad().datasets) + from sfaira_extension.data.human import DatasetGroupFemalegonad + self.datasets.update(DatasetGroupFemalegonad().datasets) except ImportError: pass diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py index 051be7563..bafe66bdf 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalFemaleGonad' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/femalegonad/hcl_FetalFemaleGonad_2.h5ad") + fn = os.path.join(self.path, "human", "femalegonad", "hcl_FetalFemaleGonad_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py index 98bb43dc8..e5d3f22d3 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalFemaleGonad' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/femalegonad/hcl_FetalFemaleGonad_1.h5ad") + fn = os.path.join(self.path, "human", "femalegonad", "hcl_FetalFemaleGonad_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/gallbladder/human_gallbladder.py b/sfaira/data/human/gallbladder/human_gallbladder.py index 4fd59206e..8d5e660f5 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder.py +++ b/sfaira/data/human/gallbladder/human_gallbladder.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupGallbladder().datasets) + from sfaira_extension.data.human import DatasetGroupGallbladder + self.datasets.update(DatasetGroupGallbladder().datasets) except ImportError: pass diff --git a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py index 75c1faa80..40a6406d9 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultGallbladder' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/gallbladder/hcl_AdultGallbladder_1.h5ad") + fn = os.path.join(self.path, "human", "gallbladder", "hcl_AdultGallbladder_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/heart/human_heart.py b/sfaira/data/human/heart/human_heart.py index aced6ac11..5aff6fdce 100644 --- a/sfaira/data/human/heart/human_heart.py +++ b/sfaira/data/human/heart/human_heart.py @@ -26,7 +26,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupHeart().datasets) + from sfaira_extension.data.human import DatasetGroupHeart + self.datasets.update(DatasetGroupHeart().datasets) except ImportError: pass diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py index 9e7ef8589..c4ee874db 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalHeart' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/heart/hcl_FetalHeart_2.h5ad") + fn = os.path.join(self.path, "human", "heart", "hcl_FetalHeart_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py index 6664c0bd7..2da149e62 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultHeart' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/heart/hcl_AdultHeart_2.h5ad") + fn = os.path.join(self.path, "human", "heart", "hcl_AdultHeart_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py index 8c5058ac2..430f8c92e 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultHeart' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/heart/hcl_AdultHeart_1.h5ad") + fn = os.path.join(self.path, "human", "heart", "hcl_AdultHeart_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py index 76d54f986..2b73cebe0 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalHeart' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/heart/hcl_FetalHeart_1.h5ad") + fn = os.path.join(self.path, "human", "heart", "hcl_FetalHeart_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/hesc/human_hesc.py b/sfaira/data/human/hesc/human_hesc.py index 3855c509d..3292ff6f3 100644 --- a/sfaira/data/human/hesc/human_hesc.py +++ b/sfaira/data/human/hesc/human_hesc.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupHesc().datasets) + from sfaira_extension.data.human import DatasetGroupHesc + self.datasets.update(DatasetGroupHesc().datasets) except ImportError: pass diff --git a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py index d7ee8d9c7..70a0816aa 100644 --- a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py +++ b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'HESC' self.dev_stage = 'HESC' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/hesc/hcl_HESC_1.h5ad") + fn = os.path.join(self.path, "human", "hesc", "hcl_HESC_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/ileum/human_ileum.py b/sfaira/data/human/ileum/human_ileum.py index b5ade8f37..636d508f2 100644 --- a/sfaira/data/human/ileum/human_ileum.py +++ b/sfaira/data/human/ileum/human_ileum.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupIleum().datasets) + from sfaira_extension.data.human import DatasetGroupIleum + self.datasets.update(DatasetGroupIleum().datasets) except ImportError: pass diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py index 6378752c5..9b4e2d89e 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_ileum_2019_10x_martin_001_10.1016/j.cell.2019.08.008" self.download_website = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" + self.download_website_meta = None self.organ = "ileum" self.sub_tissue = "ileum" self.has_celltypes = True @@ -62,7 +63,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/ileum/martin19.processed.h5ad") + fn = os.path.join(self.path, "human", "ileum", "martin19.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py index 792ad33b2..255803190 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_ileum_2019_10x_wang_001_10.1084/jem.20191130" self.download_website = "https://covid19.cog.sanger.ac.uk/wang20_ileum.processed.h5ad" + self.download_website_meta = None self.organ = "ileum" self.sub_tissue = "ileum" self.has_celltypes = True @@ -49,7 +50,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/ileum/wang20_ileum.processed.h5ad") + fn = os.path.join(self.path, "human", "ileum", "wang20_ileum.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py index 87a9ab8f9..5726c3f77 100644 --- a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py +++ b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultIleum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -68,7 +69,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/ileum/hcl_AdultIleum_2.h5ad") + fn = os.path.join(self.path, "human", "ileum", "hcl_AdultIleum_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/jejunum/human_jejunum.py b/sfaira/data/human/jejunum/human_jejunum.py index 0fa39a272..3839f321b 100644 --- a/sfaira/data/human/jejunum/human_jejunum.py +++ b/sfaira/data/human/jejunum/human_jejunum.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupJejunum().datasets) + from sfaira_extension.data.human import DatasetGroupJejunum + self.datasets.update(DatasetGroupJejunum().datasets) except ImportError: pass diff --git a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py index 6085adc05..e9d7712b4 100644 --- a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py +++ b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultJejunum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/jejunum/hcl_AdultJejunum_2.h5ad") + fn = os.path.join(self.path, "human", "jejunum", "hcl_AdultJejunum_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney.py b/sfaira/data/human/kidney/human_kidney.py index 9e175cd6f..6cc159f97 100644 --- a/sfaira/data/human/kidney/human_kidney.py +++ b/sfaira/data/human/kidney/human_kidney.py @@ -38,7 +38,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupKidney().datasets) + from sfaira_extension.data.human import DatasetGroupKidney + self.datasets.update(DatasetGroupKidney().datasets) except ImportError: pass diff --git a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py index f4d630a4a..867feb38b 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py @@ -70,8 +70,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/kidney/GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), - os.path.join(self.path, 'human/kidney/GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz') + os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), + os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t').T) annot = pd.read_csv(fn[1], index_col=0, dtype='category') diff --git a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py index d37963426..8d87c833f 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py @@ -29,6 +29,7 @@ def __init__( 'https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Mature_Full_v2.1.h5ad', 'https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Fetal_full.h5ad' ] + self.download_website_meta = None self.organ = "kidney" self.sub_tissue = "renal medulla, renal pelvis, ureter, cortex of kidney" self.has_celltypes = True @@ -115,8 +116,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/kidney/Mature_Full_v2.1.h5ad"), - os.path.join(self.path, 'human/kidney/Fetal_full.h5ad') + os.path.join(self.path, "human", "kidney", "Mature_Full_v2.1.h5ad"), + os.path.join(self.path, "human", "kidney", "Fetal_full.h5ad") ] adult = anndata.read(fn[0]) fetal = anndata.read(fn[1]) diff --git a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py index bd4590027..ee78c0032 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py @@ -56,6 +56,7 @@ def __init__( self.species = "human" self.id = "human_kidney_2020_10x_liao_001_10.1038/s41597-019-0351-8" self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" + self.download_website_meta = None self.organ = "kidney" self.sub_tissue = "kidney" self.has_celltypes = False @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/GSE131685_RAW.tar") + fn = os.path.join(self.path, "human", "kidney", "GSE131685_RAW.tar") adatas = [] with tarfile.open(fn) as tar: for member in tar.getmembers(): @@ -93,7 +94,7 @@ def _load(self, fn=None): else: if fn is None: - fn = os.path.join(self.path, "human/kidney/GSE131685.h5ad") + fn = os.path.join(self.path, "human", "kidney", "GSE131685.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Mo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py index 791af3883..68203f9b9 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'AdultKidney' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_2.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py index f623c4c1c..ca7c14f2c 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'AdultKidney' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_3.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py index 7f3e36f62..b354034d0 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'AdultKidney' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_4.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py index f66bc7db9..7d404820e 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'FetalKidney' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_3.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py index 68c304254..1b9b34ef1 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'FetalKidney' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_4.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py index 83c035039..2f1a0c45f 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'FetalKidney' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_5.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_5.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py index 8fce0aa76..36a28c728 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'FetalKidney' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_6.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_6.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/liver/human_liver.py b/sfaira/data/human/liver/human_liver.py index b88736cf1..bfb0e8dbb 100644 --- a/sfaira/data/human/liver/human_liver.py +++ b/sfaira/data/human/liver/human_liver.py @@ -36,7 +36,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupLiver().datasets) + from sfaira_extension.data.human import DatasetGroupLiver + self.datasets.update(DatasetGroupLiver().datasets) except ImportError: pass diff --git a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py index 2e8cd9ba7..c71297051 100644 --- a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_liver_2018_10x_macparland_001_10.1038/s41467-018-06318-7" self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE115469" + self.download_website_meta = 'private' self.organ = "liver" self.sub_tissue = "caudate lobe" self.has_celltypes = True @@ -63,8 +64,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/liver/GSE115469.csv.gz"), - os.path.join(self.path, 'human/liver/GSE115469_labels.txt') + os.path.join(self.path, "human", "liver", "GSE115469.csv.gz"), + os.path.join(self.path, "human", "liver", "GSE115469_labels.txt") ] self.adata = anndata.read_csv(fn[0]).T celltype_df = pd.read_csv(fn[1], sep='\t').set_index('CellName') diff --git a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py index 0e2e55aad..4560abe11 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py @@ -25,6 +25,7 @@ def __init__( self.species = "human" self.id = "human_liver_2019_10x_popescu_001_10.1038/s41586-019-1652-y" self.download_website = "https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-7407/" + self.download_website_meta = 'private' self.organ = "liver" self.sub_tissue = "liver" self.has_celltypes = True @@ -67,7 +68,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/fetal_liver_alladata_.h5ad") + fn = os.path.join(self.path, "human", "liver", "fetal_liver_alladata_.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Haniffa' diff --git a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py index 1c4c192ba..a2584a38f 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py @@ -42,6 +42,7 @@ def __init__( self.species = "human" self.id = "human_liver_2019_10x_ramachandran_001_10.1038/s41586-019-1631-3" self.download_website = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" + self.download_website_meta = None self.organ = "liver" self.sub_tissue = "liver" self.has_celltypes = True @@ -69,7 +70,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/ramachandran.h5ad") + fn = os.path.join(self.path, "human", "liver", "ramachandran.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Henderson' diff --git a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py index 656dde868..1cc865cb2 100644 --- a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py +++ b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py @@ -82,8 +82,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/liver/GSE124395_Normalhumanlivercellatlasdata.txt.gz"), - os.path.join(self.path, 'human/liver/GSE124395_clusterpartition.txt.gz') + os.path.join(self.path, "human", "liver", "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), + os.path.join(self.path, "human", "liver", "GSE124395_clusterpartition.txt.gz") ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t').T) celltype_df = pd.read_csv(fn[1], sep=' ') diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py index 27dc3c2ed..404b75a83 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Liver' self.sub_tissue = 'AdultLiver' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_1.h5ad") + fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py index 9146e4339..277efb10f 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Liver' self.sub_tissue = 'AdultLiver' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_2.h5ad") + fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py index a48826384..16b465ea4 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Liver' self.sub_tissue = 'AdultLiver' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_4.h5ad") + fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py index c749ebcc7..ed870da4b 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Liver' self.sub_tissue = 'FetalLiver' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/hcl_Liver_1.h5ad") + fn = os.path.join(self.path, "human", "liver", "hcl_Liver_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py index 8a4ecf706..48157a9f8 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Liver' self.sub_tissue = 'FetalLiver' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/hcl_Liver_2.h5ad") + fn = os.path.join(self.path, "human", "liver", "hcl_Liver_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/lung/human_lung.py b/sfaira/data/human/lung/human_lung.py index 274cfaeff..9297fd7e9 100644 --- a/sfaira/data/human/lung/human_lung.py +++ b/sfaira/data/human/lung/human_lung.py @@ -47,7 +47,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupLung().datasets) + from sfaira_extension.data.human import DatasetGroupLung + self.datasets.update(DatasetGroupLung().datasets) except ImportError: pass diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py index 81933a02f..e37dce7a1 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py @@ -27,6 +27,7 @@ def __init__( self.id = "human_lung_2019_10x_braga_001_10.1038/s41591-019-0468-5" self.download_website = "https://covid19.cog.sanger.ac.uk/" \ "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "alveoli, parenchyma" self.has_celltypes = True @@ -60,7 +61,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py index b24891994..35517ce68 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py @@ -27,6 +27,7 @@ def __init__( self.id = "human_lung_2019_10x_braga_002_10.1038/s41591-019-0468-5" self.download_website = "https://covid19.cog.sanger.ac.uk/" \ "vieira19_Bronchi_anonymised.processed.h5ad" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "bronchi" self.has_celltypes = True @@ -60,7 +61,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/vieira19_Bronchi_anonymised.processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "vieira19_Bronchi_anonymised.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py index 62c0dd849..ed83f3c95 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py @@ -25,6 +25,7 @@ def __init__( self.species = "human" self.id = "human_lung_2019_10x_madissoon_001._10.1186/s13059-019-1906-x" self.download_website = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "parenchyma" self.has_celltypes = True @@ -65,7 +66,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/madissoon19_lung.processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "madissoon19_lung.processed.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Meyer' diff --git a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py index 280ff991a..b4928a0e2 100644 --- a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py +++ b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py @@ -56,8 +56,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/lung/GSE130148_raw_counts.csv.gz"), - os.path.join(self.path, "human/lung/GSE130148_barcodes_cell_types.txt.gz"), + os.path.join(self.path, "human", "lung", "GSE130148_raw_counts.csv.gz"), + os.path.join(self.path, "human", "lung", "GSE130148_barcodes_cell_types.txt.gz"), ] self.adata = anndata.read_csv(fn[0]).T self.adata.obs = pd.read_csv(fn[1], sep='\t', index_col=0) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py index 22326b4f6..075f59ce8 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py @@ -91,10 +91,10 @@ def _load(self, fn=None): if self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/lung/GSE135893_matrix.mtx.gz"), - os.path.join(self.path, "human/lung/GSE135893_genes.tsv.gz"), - os.path.join(self.path, "human/lung/GSE135893_barcodes.tsv.gz"), - os.path.join(self.path, "human/lung/GSE135893_IPF_metadata.csv.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_matrix.mtx.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_genes.tsv.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_barcodes.tsv.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_IPF_metadata.csv.gz"), ] self.adata = anndata.read_mtx(fn[0]).T self.adata.var = pd.read_csv(fn[1], index_col=0, header=None, names=['ids']) @@ -104,7 +104,7 @@ def _load(self, fn=None): self.adata.obs = obs else: if fn is None: - fn = os.path.join(self.path, "human/lung/habermann_processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "habermann_processed.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Kropski' diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py index a72e5caba..119833343 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_lung_2020_10x_lukassen_001_10.1101/2020.03.13.991455" self.download_website = "https://covid19.cog.sanger.ac.uk/lukassen20_lung_orig.processed.h5ad" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "lung" self.has_celltypes = True @@ -51,7 +52,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/lukassen20_lung_orig.processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "lukassen20_lung_orig.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py index 3bae58de9..24b52c8ca 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_lung_2020_10x_lukassen_002_10.1101/2020.03.13.991455" self.download_website = "https://covid19.cog.sanger.ac.uk/lukassen20_airway_orig.processed.h5ad" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "bronchial epithelial cells" self.has_celltypes = True @@ -56,7 +57,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/lukassen20_airway_orig.processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "lukassen20_airway_orig.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ diff --git a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py index a01e2cc95..a868b5420 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_lung_2020_10x_miller_001_10.1016/j.devcel.2020.01.033" self.download_website = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "fetal lung" self.has_celltypes = True @@ -67,7 +68,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/miller20.processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "miller20.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None]))\ diff --git a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py index d1a3c8551..f2cf531e2 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py @@ -33,8 +33,9 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "human" - self.id = "human_lung_2020_10x_travaglini_001_10.1101/742320" + self.id = "human_lung_2020_10x_travaglini_001_10.1038/s41586-020-2922-4" self.download_website = "https://www.synapse.org/#!Synapse:syn21041850" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "proximal, medial, distal, blood" self.has_celltypes = True @@ -107,7 +108,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") + fn = os.path.join(self.path, "human", "lung", "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") self.adata = anndata.read(fn) self.adata.X = scipy.sparse.csc_matrix(self.adata.X) self.adata.X = np.expm1(self.adata.X) @@ -116,7 +117,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Krasnow' self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/742320" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41586-020-2922-4" self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py index 627199ddf..ab1e02530 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalLung' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -92,7 +93,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/hcl_FetalLung_1.h5ad") + fn = os.path.join(self.path, "human", "lung", "hcl_FetalLung_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py index 5efd59b81..f52f73078 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultLung' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -92,7 +93,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/hcl_AdultLung_3.h5ad") + fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py index cfcf99f45..50fd78529 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultLung' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -92,7 +93,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/hcl_AdultLung_2.h5ad") + fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py index 96d16219d..1f3715724 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultLung' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -92,7 +93,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/hcl_AdultLung_1.h5ad") + fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py index b65ea2a8c..223d3a409 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalLung' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -92,7 +93,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/hcl_FetalLung_2.h5ad") + fn = os.path.join(self.path, "human", "lung", "hcl_FetalLung_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py index 8147dc611..77f4c67e0 100644 --- a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py +++ b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py @@ -33,8 +33,9 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "human" - self.id = "human_lung_2020_smartseq2_travaglini_002_10.1101/742320" + self.id = "human_lung_2020_smartseq2_travaglini_002_10.1038/s41586-020-2922-4" self.download_website = "https://www.synapse.org/#!Synapse:syn21041850" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "proximal, medial, distal, blood" self.has_celltypes = True @@ -94,7 +95,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/facs_normal_lung_blood_scanpy.20200205.RC4.h5ad") + fn = os.path.join(self.path, "human", "lung", "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad") self.adata = anndata.read(fn) self.adata.X = scipy.sparse.csc_matrix(self.adata.X) self.adata.X = np.expm1(self.adata.X) @@ -103,7 +104,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Krasnow' self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/742320" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41586-020-2922-4" self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'smartseq2' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue diff --git a/sfaira/data/human/malegonad/human_malegonad.py b/sfaira/data/human/malegonad/human_malegonad.py index 508259f2a..ea7e995df 100644 --- a/sfaira/data/human/malegonad/human_malegonad.py +++ b/sfaira/data/human/malegonad/human_malegonad.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupMalegonad().datasets) + from sfaira_extension.data.human import DatasetGroupMalegonad + self.datasets.update(DatasetGroupMalegonad().datasets) except ImportError: pass diff --git a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py index 0ebc7036e..03362c90a 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_malegonad_2018_10x_guo_001_10.1038/s41422-018-0099-2" self.download_website = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" + self.download_website_meta = None self.organ = "malegonad" self.sub_tissue = "testis" self.has_celltypes = True @@ -53,7 +54,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/malegonad/guo18_donor.processed.h5ad") + fn = os.path.join(self.path, "human", "malegonad", "guo18_donor.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py index 372074230..f530fbc85 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalMaleGonad' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/malegonad/hcl_FetalMaleGonad_1.h5ad") + fn = os.path.join(self.path, "human", "malegonad", "hcl_FetalMaleGonad_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py index 74b0d6eb1..ca8a98733 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalMaleGonad' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/malegonad/hcl_FetalMaleGonad_2.h5ad") + fn = os.path.join(self.path, "human", "malegonad", "hcl_FetalMaleGonad_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/mixed/human_mixed.py b/sfaira/data/human/mixed/human_mixed.py index d46f9e03d..cbce1da35 100644 --- a/sfaira/data/human/mixed/human_mixed.py +++ b/sfaira/data/human/mixed/human_mixed.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupMixed().datasets) + from sfaira_extension.data.human import DatasetGroupMixed + self.datasets.update(DatasetGroupMixed().datasets) except ImportError: pass diff --git a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py index 2559948d0..924c3a978 100644 --- a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py @@ -82,6 +82,7 @@ def __init__( self.species = "human" self.id = "human_mixed_2019_10x_szabo_001_10.1038/s41467-019-12464-3" self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" + self.download_website_meta = 'private' self.organ = "mixed" self.sub_tissue = "Bone Marrow, Lung, Lymph Node" self.has_celltypes = True @@ -98,9 +99,9 @@ def _load(self, fn=None): if self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/mixed/GSE126030_RAW.tar"), - os.path.join(self.path, "human/mixed/donor1.annotation.txt"), - os.path.join(self.path, "human/mixed/donor2.annotation.txt"), + os.path.join(self.path, "human", "mixed", "GSE126030_RAW.tar"), + os.path.join(self.path, "human", "mixed", "donor1.annotation.txt"), + os.path.join(self.path, "human", "mixed", "donor2.annotation.txt"), ] adatas = [] with tarfile.open(fn[0]) as tar: @@ -147,7 +148,7 @@ def _load(self, fn=None): self.adata.X = scipy.sparse.csc_matrix(self.adata.X) else: if fn is None: - fn = os.path.join(self.path, "human/mixed/GSE126030.h5ad") + fn = os.path.join(self.path, "human", "mixed", "GSE126030.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = "Sims" diff --git a/sfaira/data/human/muscle/human_muscle.py b/sfaira/data/human/muscle/human_muscle.py index a96850a70..3ccea8560 100644 --- a/sfaira/data/human/muscle/human_muscle.py +++ b/sfaira/data/human/muscle/human_muscle.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupMuscle().datasets) + from sfaira_extension.data.human import DatasetGroupMuscle + self.datasets.update(DatasetGroupMuscle().datasets) except ImportError: pass diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py index c24351538..e4defc46f 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalMuscle' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/muscle/hcl_FetalMuscle_1.h5ad") + fn = os.path.join(self.path, "human", "muscle", "hcl_FetalMuscle_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py index 5f8f4c2d5..c92719054 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultMuscle' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/muscle/hcl_AdultMuscle_1.h5ad") + fn = os.path.join(self.path, "human", "muscle", "hcl_AdultMuscle_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/omentum/human_omentum.py b/sfaira/data/human/omentum/human_omentum.py index a1d1879b7..56f49739e 100644 --- a/sfaira/data/human/omentum/human_omentum.py +++ b/sfaira/data/human/omentum/human_omentum.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupOmentum().datasets) + from sfaira_extension.data.human import DatasetGroupOmentum + self.datasets.update(DatasetGroupOmentum().datasets) except ImportError: pass diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py index 3275d67ea..2550ea8ed 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultOmentum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_2.h5ad") + fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py index aeaf8842f..88614e843 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultOmentum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_3.h5ad") + fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py index d2ff1416e..e9fe7fff4 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultOmentum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_1.h5ad") + fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/pancreas/human_pancreas.py b/sfaira/data/human/pancreas/human_pancreas.py index 9252cbf37..84bc010fd 100644 --- a/sfaira/data/human/pancreas/human_pancreas.py +++ b/sfaira/data/human/pancreas/human_pancreas.py @@ -32,7 +32,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupPancreas().datasets) + from sfaira_extension.data.human import DatasetGroupPancreas + self.datasets.update(DatasetGroupPancreas().datasets) except ImportError: pass diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py index b8f672703..f7b378c33 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_pancreas_2016_indrop_baron_001_10.1016/j.cels.2016.08.011" self.download_website = "https://covid19.cog.sanger.ac.uk/baron16.processed.h5ad" + self.download_website_meta = None self.organ = "pancreas" self.sub_tissue = "pancreas" self.has_celltypes = True @@ -56,7 +57,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/pancreas/baron16.processed.h5ad") + fn = os.path.join(self.path, "human", "pancreas", "baron16.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py index 33d382dd0..f60cea9a0 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -57,8 +57,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/pancreas/E-MTAB-5061.processed.1.zip"), - os.path.join(self.path, "human/pancreas/E-MTAB-5061.sdrf.txt") + os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.processed.1.zip"), + os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.sdrf.txt") ] df = pd.read_csv(fn[0], sep='\t') df.index = df.index.get_level_values(0) diff --git a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py index d9cfa8d4b..a65b155fd 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py @@ -93,8 +93,8 @@ def _load(self, fn=None): if self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/pancreas/GSE81547_RAW.tar"), - os.path.join(self.path, "human/pancreas/GSE81547_series_matrix.txt.gz") + os.path.join(self.path, "human", "pancreas", "GSE81547_RAW.tar"), + os.path.join(self.path, "human", "pancreas", "GSE81547_series_matrix.txt.gz") ] dfs = [] with tarfile.open(fn[0]) as tar: @@ -126,7 +126,7 @@ def _load(self, fn=None): else: if fn is None: - fn = os.path.join(self.path, "human/pancreas/GSE81547.h5ad") + fn = os.path.join(self.path, "human", "pancreas", "GSE81547.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py index 63dfb2c74..7d79c6021 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Pancreas' self.sub_tissue = 'AdultPancreas' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -80,7 +81,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/pancreas/hcl_AdultPancreas_1.h5ad") + fn = os.path.join(self.path, "human", "pancreas", "hcl_AdultPancreas_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py index 009adbdfa..126a23ce5 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Pancreas' self.sub_tissue = 'FetalPancreas' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -80,7 +81,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_1.h5ad") + fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py index 3932c1255..70e7187bb 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Pancreas' self.sub_tissue = 'FetalPancreas' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -80,7 +81,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_2.h5ad") + fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py index 2abd51705..6f3ef7db0 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Pancreas' self.sub_tissue = 'FetalPancreas' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -80,7 +81,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_3.h5ad") + fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/placenta/human_placenta.py b/sfaira/data/human/placenta/human_placenta.py index 5d8df2fed..03a23584f 100644 --- a/sfaira/data/human/placenta/human_placenta.py +++ b/sfaira/data/human/placenta/human_placenta.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupPlacenta().datasets) + from sfaira_extension.data.human import DatasetGroupPlacenta + self.datasets.update(DatasetGroupPlacenta().datasets) except ImportError: pass diff --git a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py index 72c453c24..22f6d9e57 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py @@ -75,8 +75,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/placenta/E-MTAB-6701.processed.1.zip"), - os.path.join(self.path, "human/placenta/E-MTAB-6701.processed.2.zip"), + os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.1.zip"), + os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.2.zip"), ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t', index_col='Gene').T) df = pd.read_csv(fn[1], sep='\t') diff --git a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py index 22d461ad2..b09596a10 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py @@ -75,8 +75,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/placenta/E-MTAB-6678.processed.1.zip"), - os.path.join(self.path, "human/placenta/E-MTAB-6678.processed.2.zip"), + os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.1.zip"), + os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.2.zip"), ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t', index_col='Gene').T) df = pd.read_csv(fn[1], sep='\t') diff --git a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py index fde6c3d50..607feae07 100644 --- a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py +++ b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Placenta' self.sub_tissue = 'Placenta' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/placenta/hcl_Placenta_1.h5ad") + fn = os.path.join(self.path, "human", "placenta", "hcl_Placenta_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/pleura/human_pleura.py b/sfaira/data/human/pleura/human_pleura.py index 853035a55..db3707fda 100644 --- a/sfaira/data/human/pleura/human_pleura.py +++ b/sfaira/data/human/pleura/human_pleura.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupPleura().datasets) + from sfaira_extension.data.human import DatasetGroupPleura + self.datasets.update(DatasetGroupPleura().datasets) except ImportError: pass diff --git a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py index b50155990..39412e70b 100644 --- a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py +++ b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultPleura' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/pleura/hcl_AdultPleura_1.h5ad") + fn = os.path.join(self.path, "human", "pleura", "hcl_AdultPleura_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/prostate/human_prostate.py b/sfaira/data/human/prostate/human_prostate.py index a6d15c2e2..cf3a5485b 100644 --- a/sfaira/data/human/prostate/human_prostate.py +++ b/sfaira/data/human/prostate/human_prostate.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupProstate().datasets) + from sfaira_extension.data.human import DatasetGroupProstate + self.datasets.update(DatasetGroupProstate().datasets) except ImportError: pass diff --git a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py index 82a775f96..fb8c5907b 100644 --- a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_prostate_2018_10x_henry_001_10.1016/j.celrep.2018.11.086" self.download_website = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" + self.download_website_meta = None self.organ = "prostate" self.sub_tissue = "prostate" self.has_celltypes = True @@ -50,7 +51,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/prostate/henry18_0.processed.h5ad") + fn = os.path.join(self.path, "human", "prostate", "henry18_0.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py index 322236187..7e1c44559 100644 --- a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py +++ b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultProstate' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -62,7 +63,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/prostate/hcl_AdultProstate_1.h5ad") + fn = os.path.join(self.path, "human", "prostate", "hcl_AdultProstate_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/rectum/human_rectum.py b/sfaira/data/human/rectum/human_rectum.py index 236ccca09..be4385732 100644 --- a/sfaira/data/human/rectum/human_rectum.py +++ b/sfaira/data/human/rectum/human_rectum.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupRectum().datasets) + from sfaira_extension.data.human import DatasetGroupRectum + self.datasets.update(DatasetGroupRectum().datasets) except ImportError: pass diff --git a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py index f92d2981a..9bbb7957d 100644 --- a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py +++ b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py @@ -49,7 +49,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/rectum/wang20_rectum.processed.h5ad") + fn = os.path.join(self.path, "human", "rectum", "wang20_rectum.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py index e8f80e4d9..c1a2a0009 100644 --- a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py +++ b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultRectum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -57,7 +58,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/rectum/hcl_AdultRectum_1.h5ad") + fn = os.path.join(self.path, "human", "rectum", "hcl_AdultRectum_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/rib/human_rib.py b/sfaira/data/human/rib/human_rib.py index 0b7e9a07a..60f2c6df8 100644 --- a/sfaira/data/human/rib/human_rib.py +++ b/sfaira/data/human/rib/human_rib.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupRib().datasets) + from sfaira_extension.data.human import DatasetGroupRib + self.datasets.update(DatasetGroupRib().datasets) except ImportError: pass diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py index 4de6aa18b..e5e1a4e65 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalRib' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/rib/hcl_FetalRib_2.h5ad") + fn = os.path.join(self.path, "human", "rib", "hcl_FetalRib_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py index 5eac488d3..421ad2efc 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalRib' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/rib/hcl_FetalRib_3.h5ad") + fn = os.path.join(self.path, "human", "rib", "hcl_FetalRib_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/skin/human_skin.py b/sfaira/data/human/skin/human_skin.py index b13b7c39c..16887b9dd 100644 --- a/sfaira/data/human/skin/human_skin.py +++ b/sfaira/data/human/skin/human_skin.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupSkin().datasets) + from sfaira_extension.data.human import DatasetGroupSkin + self.datasets.update(DatasetGroupSkin().datasets) except ImportError: pass diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py index af794cb5c..aba00a706 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalSkin' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -71,7 +72,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/skin/hcl_FetalSkin_2.h5ad") + fn = os.path.join(self.path, "human", "skin", "hcl_FetalSkin_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py index 0bcf79a5e..5b1772b9f 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalSkin' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -71,7 +72,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/skin/hcl_FetalSkin_3.h5ad") + fn = os.path.join(self.path, "human", "skin", "hcl_FetalSkin_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/spinalcord/human_spinalcord.py b/sfaira/data/human/spinalcord/human_spinalcord.py index 246497a33..b56b23b34 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord.py +++ b/sfaira/data/human/spinalcord/human_spinalcord.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupSpinalcord().datasets) + from sfaira_extension.data.human import DatasetGroupSpinalcord + self.datasets.update(DatasetGroupSpinalcord().datasets) except ImportError: pass diff --git a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py index e9a0e0561..3a276955e 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py +++ b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalSpinalCord' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/spinalcord/hcl_FetalSpinalCord_1.h5ad") + fn = os.path.join(self.path, "human", "spinalcord", "hcl_FetalSpinalCord_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/spleen/human_spleen.py b/sfaira/data/human/spleen/human_spleen.py index 783e2922c..0ca8ab386 100644 --- a/sfaira/data/human/spleen/human_spleen.py +++ b/sfaira/data/human/spleen/human_spleen.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupSpleen().datasets) + from sfaira_extension.data.human import DatasetGroupSpleen + self.datasets.update(DatasetGroupSpleen().datasets) except ImportError: pass diff --git a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py index 631710bb9..7517ae6bc 100644 --- a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py +++ b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py @@ -26,6 +26,7 @@ def __init__( self.species = "human" self.id = "human_spleen_2019_10x_madissoon_001_10.1101/741405" self.download_website = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" + self.download_website_meta = None self.organ = "spleen" self.sub_tissue = "spleen" self.has_celltypes = True @@ -69,7 +70,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/spleen/spleen.cellxgene.h5ad") + fn = os.path.join(self.path, "human", "spleen", "spleen.cellxgene.h5ad") self.adata = anndata.read(fn) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py index ca36b269a..047d8e8a1 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Spleen' self.sub_tissue = 'AdultSpleen' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -63,7 +64,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/spleen/hcl_AdultSpleenParenchyma_1.h5ad") + fn = os.path.join(self.path, "human", "spleen", "hcl_AdultSpleenParenchyma_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py index 08126ca29..fbe16555f 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Spleen' self.sub_tissue = 'AdultSpleen' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -63,7 +64,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/spleen/hcl_AdultSpleen_1.h5ad") + fn = os.path.join(self.path, "human", "spleen", "hcl_AdultSpleen_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach.py b/sfaira/data/human/stomach/human_stomach.py index 64d5fdd76..0121077f9 100644 --- a/sfaira/data/human/stomach/human_stomach.py +++ b/sfaira/data/human/stomach/human_stomach.py @@ -38,7 +38,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupStomach().datasets) + from sfaira_extension.data.human import DatasetGroupStomach + self.datasets.update(DatasetGroupStomach().datasets) except ImportError: pass diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py index 9b994d4bb..abccb93fc 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultStomach' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_1.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py index a4f315103..aaf8690d6 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalStomach' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalStomach_1.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalStomach_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py index 0ddfd91a3..d88265402 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalIntestine' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_1.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py index d756c8490..3d85156cc 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalIntestine' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalIntetsine_3.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntetsine_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py index 61e958ae6..da1bdd129 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalStomach' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalStomach_2.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalStomach_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py index 8895c9ff4..74f7f340b 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalIntestine' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_2.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py index 368acf00a..2f44f8e40 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalIntestine' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_5.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_5.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py index 320653c45..b0cd3ba45 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultStomach' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_3.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py index 623a6ccd7..638af9954 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultStomach' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_2.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py index 2e5fcdf2b..7034a3e71 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalIntestine' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_4.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/thymus/human_thymus.py b/sfaira/data/human/thymus/human_thymus.py index 22e5d45fc..1e592a837 100644 --- a/sfaira/data/human/thymus/human_thymus.py +++ b/sfaira/data/human/thymus/human_thymus.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupThymus().datasets) + from sfaira_extension.data.human import DatasetGroupThymus + self.datasets.update(DatasetGroupThymus().datasets) except ImportError: pass diff --git a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py index 2d2dce074..b70663e7f 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py @@ -26,6 +26,7 @@ def __init__( self.species = "human" self.id = "human_thymus_2020_10x_park_001_10.1126/science.aay3224" self.download_website = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" + self.download_website_meta = None self.organ = "thymus" self.sub_tissue = "fetal thymus" self.has_celltypes = True @@ -85,7 +86,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/thymus/park20.processed.h5ad") + fn = os.path.join(self.path, "human", "thymus", "park20.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py index 020dc1314..4aef4a0ee 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalThymus' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -55,7 +56,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/thymus/hcl_FetalThymus_2.h5ad") + fn = os.path.join(self.path, "human", "thymus", "hcl_FetalThymus_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py index c48987fc9..853191631 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalThymus' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -55,7 +56,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/thymus/hcl_FetalThymus_1.h5ad") + fn = os.path.join(self.path, "human", "thymus", "hcl_FetalThymus_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/thyroid/human_thyroid.py b/sfaira/data/human/thyroid/human_thyroid.py index 39a261a91..0a8cc4e0d 100644 --- a/sfaira/data/human/thyroid/human_thyroid.py +++ b/sfaira/data/human/thyroid/human_thyroid.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupThyroid().datasets) + from sfaira_extension.data.human import DatasetGroupThyroid + self.datasets.update(DatasetGroupThyroid().datasets) except ImportError: pass diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py index 708a50506..1dff2bf3f 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultThyroid' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/thyroid/hcl_AdultThyroid_2.h5ad") + fn = os.path.join(self.path, "human", "thyroid", "hcl_AdultThyroid_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py index db2477eea..64d7fa4da 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultThyroid' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/thyroid/hcl_AdultThyroid_1.h5ad") + fn = os.path.join(self.path, "human", "thyroid", "hcl_AdultThyroid_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/trachea/human_trachea.py b/sfaira/data/human/trachea/human_trachea.py index 1860bd810..6ba918535 100644 --- a/sfaira/data/human/trachea/human_trachea.py +++ b/sfaira/data/human/trachea/human_trachea.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupTrachea().datasets) + from sfaira_extension.data.human import DatasetGroupTrachea + self.datasets.update(DatasetGroupTrachea().datasets) except ImportError: pass diff --git a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py index d28142b4b..b263a8e5c 100644 --- a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py +++ b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultTrachea' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/trachea/hcl_AdultTrachea_2.h5ad") + fn = os.path.join(self.path, "human", "trachea", "hcl_AdultTrachea_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/ureter/human_ureter.py b/sfaira/data/human/ureter/human_ureter.py index c7e721f78..452153557 100644 --- a/sfaira/data/human/ureter/human_ureter.py +++ b/sfaira/data/human/ureter/human_ureter.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupUreter().datasets) + from sfaira_extension.data.human import DatasetGroupUreter + self.datasets.update(DatasetGroupUreter().datasets) except ImportError: pass diff --git a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py index 170a2257c..1bf9da65a 100644 --- a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py +++ b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultUreter' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/ureter/hcl_AdultUreter_1.h5ad") + fn = os.path.join(self.path, "human", "ureter", "hcl_AdultUreter_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/uterus/human_uterus.py b/sfaira/data/human/uterus/human_uterus.py index 742026585..4d8789bff 100644 --- a/sfaira/data/human/uterus/human_uterus.py +++ b/sfaira/data/human/uterus/human_uterus.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupUterus().datasets) + from sfaira_extension.data.human import DatasetGroupUterus + self.datasets.update(DatasetGroupUterus().datasets) except ImportError: pass diff --git a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py index eec59d718..8d00b14cf 100644 --- a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py +++ b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultUterus' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/uterus/hcl_AdultUterus_1.h5ad") + fn = os.path.join(self.path, "human", "uterus", "hcl_AdultUterus_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/interactive/__init__.py b/sfaira/data/interactive/__init__.py new file mode 100644 index 000000000..2c6ea4905 --- /dev/null +++ b/sfaira/data/interactive/__init__.py @@ -0,0 +1 @@ +from .loader import DatasetInteractive diff --git a/sfaira/data/mouse/fat/external.py b/sfaira/data/interactive/external.py similarity index 100% rename from sfaira/data/mouse/fat/external.py rename to sfaira/data/interactive/external.py diff --git a/sfaira/data/interactive/loader.py b/sfaira/data/interactive/loader.py new file mode 100644 index 000000000..45e6b6d8a --- /dev/null +++ b/sfaira/data/interactive/loader.py @@ -0,0 +1,44 @@ +import anndata +from typing import Union +from .external import DatasetBase + + +class DatasetInteractive(DatasetBase): + + def __init__( + self, + data: anndata.AnnData, + species: str, + organ: str, + gene_symbol_col: Union[str, None] = 'index', + gene_ens_col: Union[str, None] = None, + class_maps: dict = {}, + dataset_id: str = "interactive", + **kwargs + ): + """ + + :param data: + :param species: + :param organ: + :param class_maps: + :param id: + :param kwargs: + """ + DatasetBase.__init__(self=self, path=None, meta_path=None, **kwargs) + self.adata = data + self.species = species + self.id = dataset_id + self.organ = organ + + self.gene_symbol_col = gene_symbol_col + self.gene_ensg_col = gene_ens_col + + self.class_maps = class_maps + + def _load(self, fn=None): + self._convert_and_set_var_names( + symbol_col=self.gene_symbol_col, + ensembl_col=self.gene_ensg_col, + new_index='ensembl' + ) diff --git a/sfaira/data/mouse/__init__.py b/sfaira/data/mouse/__init__.py index 69c58a155..f1063a851 100644 --- a/sfaira/data/mouse/__init__.py +++ b/sfaira/data/mouse/__init__.py @@ -1,27 +1,27 @@ from .bladder import DatasetGroupBladder from .brain import DatasetGroupBrain from .diaphragm import DatasetGroupDiaphragm -from .fat import DatasetGroupFat +from .adipose import DatasetGroupAdipose from .heart import DatasetGroupHeart from .kidney import DatasetGroupKidney -from .large_intestine import DatasetGroupLargeintestine -from .limb_muscle import DatasetGroupLimbmuscle +from .colon import DatasetGroupColon +from .muscle import DatasetGroupMuscle from .liver import DatasetGroupLiver from .lung import DatasetGroupLung -from .mammary_gland import DatasetGroupMammaryGland -from .marrow import DatasetGroupMarrow -from .ovary import DatasetGroupOvary +from .mammarygland import DatasetGroupMammaryGland +from .bone import DatasetGroupBone +from .femalegonad import DatasetGroupFemalegonad from .pancreas import DatasetGroupPancreas from .placenta import DatasetGroupPlacenta -from .peripheral_blood import DatasetGroupPeripheralBlood +from .blood import DatasetGroupBlood from .prostate import DatasetGroupProstate from .rib import DatasetGroupRib -from .small_intestine import DatasetGroupSmallintestine +from .ileum import DatasetGroupIleum from .skin import DatasetGroupSkin from .spleen import DatasetGroupSpleen from .stomach import DatasetGroupStomach -from .testis import DatasetGroupTestis +from .malegonad import DatasetGroupMalegonad from .thymus import DatasetGroupThymus from .tongue import DatasetGroupTongue -from .trachae import DatasetGroupTrachea +from .trachea import DatasetGroupTrachea from .uterus import DatasetGroupUterus diff --git a/sfaira/data/mouse/adipose/__init__.py b/sfaira/data/mouse/adipose/__init__.py new file mode 100644 index 000000000..c23acef29 --- /dev/null +++ b/sfaira/data/mouse/adipose/__init__.py @@ -0,0 +1 @@ +from .mouse_adipose import DatasetGroupAdipose \ No newline at end of file diff --git a/sfaira/data/mouse/large_intestine/external.py b/sfaira/data/mouse/adipose/external.py similarity index 100% rename from sfaira/data/mouse/large_intestine/external.py rename to sfaira/data/mouse/adipose/external.py diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood.py b/sfaira/data/mouse/adipose/mouse_adipose.py similarity index 54% rename from sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood.py rename to sfaira/data/mouse/adipose/mouse_adipose.py index 6177b0115..67b4ba1c2 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood.py +++ b/sfaira/data/mouse/adipose/mouse_adipose.py @@ -3,14 +3,14 @@ from .external import DatasetGroupBase -from .mouse_peripheral_blood_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_peripheral_blood_2018_microwell_han_002 import Dataset as Dataset0002 -from .mouse_peripheral_blood_2018_microwell_han_003 import Dataset as Dataset0003 -from .mouse_peripheral_blood_2018_microwell_han_004 import Dataset as Dataset0004 -from .mouse_peripheral_blood_2018_microwell_han_005 import Dataset as Dataset0005 +from .mouse_adipose_2019_10x_pisco_001 import Dataset as Dataset0001 +from .mouse_adipose_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from .mouse_adipose_2019_smartseq2_pisco_002 import Dataset as Dataset0003 +from .mouse_adipose_2019_smartseq2_pisco_003 import Dataset as Dataset0004 +from .mouse_adipose_2019_smartseq2_pisco_004 import Dataset as Dataset0005 -class DatasetGroupPeripheralBlood (DatasetGroupBase): +class DatasetGroupAdipose(DatasetGroupBase): def __init__( self, @@ -28,7 +28,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupPeripheralBlood().datasets) + from sfaira_extension.data.mouse import DatasetGroupAdipose + self.datasets.update(DatasetGroupAdipose().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py similarity index 88% rename from sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py rename to sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py index 375b0e14a..09aea8e6c 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py @@ -18,7 +18,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_fat_2019_smartseq2_pisco_004_10.1101/661728" + self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,8 +26,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "fat" - self.sub_tissue = "fat" + self.organ = "adipose" + self.sub_tissue = "adipose" self.has_celltypes = True self.class_maps = { @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/fat/tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/fat/Fat_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "Fat_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py similarity index 88% rename from sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py rename to sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py index 299f40343..57cc116d0 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py @@ -18,7 +18,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_fat_2019_smartseq2_pisco_004_10.1101/661728" + self.id = "mouse_adipose_2019_smartseq2_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,8 +26,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "fat" - self.sub_tissue = "fat" + self.organ = "adipose" + self.sub_tissue = "adipose" self.has_celltypes = True self.class_maps = { @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/fat/tabula-muris-senis-facs-processed-official-annotations-SCAT.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-BAT.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/fat/scat_facs.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "bat_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py similarity index 88% rename from sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py rename to sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py index a3482f3f9..bfdf3fe1e 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py @@ -7,8 +7,6 @@ class Dataset(DatasetBase): - id: str - def __init__( self, path: Union[str, None] = None, @@ -18,7 +16,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_fat_2019_smartseq2_pisco_001_10.1101/661728" + self.id = "mouse_adipose_2019_smartseq2_pisco_002_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,8 +24,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "fat" - self.sub_tissue = "fat" + self.organ = "adipose" + self.sub_tissue = "adipose" self.has_celltypes = True self.class_maps = { @@ -39,9 +37,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/fat/tabula-muris-senis-facs-processed-official-annotations-BAT.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-GAT.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/fat/bat_facs.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "gat_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py similarity index 88% rename from sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py rename to sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py index 5a94230e4..46fd2d683 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py @@ -18,7 +18,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_fat_2019_smartseq2_pisco_003_10.1101/661728" + self.id = "mouse_adipose_2019_smartseq2_pisco_003_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,8 +26,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "fat" - self.sub_tissue = "fat" + self.organ = "adipose" + self.sub_tissue = "adipose" self.has_celltypes = True self.class_maps = { @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/fat/tabula-muris-senis-facs-processed-official-annotations-MAT.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-MAT.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/fat/mat_facs.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "mat_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py new file mode 100644 index 000000000..e74a9c555 --- /dev/null +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py @@ -0,0 +1,71 @@ +import anndata +import os +from typing import Union +from .external import DatasetBase + + +class Dataset(DatasetBase): + + id: str + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) + self.species = "mouse" + self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" + self.source = source + if self.source == "aws": + self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" + elif self.source == "figshare": + self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" + else: + raise ValueError("source %s not recognized" % self.source) + self.organ = "adipose" + self.sub_tissue = "adipose" + self.has_celltypes = True + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-SCAT.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "adipose", "scat_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self.adata = anndata.read_h5ad(fn) + if self.source == "aws": + self.adata.X = self.adata.raw.X + self.adata.var = self.adata.raw.var + del self.adata.raw + self.adata.obsm = {} + self.adata.varm = {} + self.adata.uns = {} + + self.adata.uns["lab"] = "Quake" + self.adata.uns["year"] = "2019" + self.adata.uns["doi"] = "10.1101/661728" + self.adata.uns["protocol"] = "smartseq2" + self.adata.uns["organ"] = self.organ + self.adata.uns["subtissue"] = self.sub_tissue + self.adata.uns["animal"] = "mouse" + self.adata.uns["id"] = self.id + self.adata.uns["wget_download"] = self.download_website + self.adata.uns["has_celltypes"] = self.has_celltypes + self.adata.uns["counts"] = 'norm' + # self.adata.obs["cell_ontology_class"] is already set + self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() + self.adata.obs["healthy"] = True + self.adata.obs["state_exact"] = "healthy" + + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') diff --git a/sfaira/data/mouse/bladder/mouse_bladder.py b/sfaira/data/mouse/bladder/mouse_bladder.py index 605ec696c..2e2e655a6 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder.py +++ b/sfaira/data/mouse/bladder/mouse_bladder.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupBladder().datasets) + from sfaira_extension.data.mouse import DatasetGroupBladder + self.datasets.update(DatasetGroupBladder().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py index 95964f1e1..5eeccb976 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py @@ -50,8 +50,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Bladder_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Bladder_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py index 03bcf78c9..3f752594a 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/bladder/tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad") + fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/bladder/Bladder_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "bladder", "Bladder_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py index 33199af3a..624b84d65 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py @@ -37,9 +37,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/bladder/tabula-muris-senis-facs-processed-official-annotations-Bladder.h5ad") + fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-facs-processed-official-annotations-Bladder.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/bladder/Bladder_facs.h5ad") + fn = os.path.join(self.path, "mouse", "bladder", "Bladder_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/blood/__init__.py b/sfaira/data/mouse/blood/__init__.py new file mode 100644 index 000000000..6b0e27f4f --- /dev/null +++ b/sfaira/data/mouse/blood/__init__.py @@ -0,0 +1 @@ +from .mouse_blood import DatasetGroupBlood \ No newline at end of file diff --git a/sfaira/data/mouse/limb_muscle/external.py b/sfaira/data/mouse/blood/external.py similarity index 100% rename from sfaira/data/mouse/limb_muscle/external.py rename to sfaira/data/mouse/blood/external.py diff --git a/sfaira/data/mouse/fat/mouse_fat.py b/sfaira/data/mouse/blood/mouse_blood.py similarity index 59% rename from sfaira/data/mouse/fat/mouse_fat.py rename to sfaira/data/mouse/blood/mouse_blood.py index 670227066..6098a0870 100644 --- a/sfaira/data/mouse/fat/mouse_fat.py +++ b/sfaira/data/mouse/blood/mouse_blood.py @@ -3,14 +3,14 @@ from .external import DatasetGroupBase -from .mouse_fat_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_fat_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_fat_2019_smartseq2_pisco_002 import Dataset as Dataset0003 -from .mouse_fat_2019_smartseq2_pisco_003 import Dataset as Dataset0004 -from .mouse_fat_2019_smartseq2_pisco_004 import Dataset as Dataset0005 +from .mouse_blood_2018_microwell_han_001 import Dataset as Dataset0001 +from .mouse_blood_2018_microwell_han_002 import Dataset as Dataset0002 +from .mouse_blood_2018_microwell_han_003 import Dataset as Dataset0003 +from .mouse_blood_2018_microwell_han_004 import Dataset as Dataset0004 +from .mouse_blood_2018_microwell_han_005 import Dataset as Dataset0005 -class DatasetGroupFat(DatasetGroupBase): +class DatasetGroupBlood (DatasetGroupBase): def __init__( self, @@ -28,7 +28,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupFat().datasets) + from sfaira_extension.data.mouse import DatasetGroupBlood + self.datasets.update(DatasetGroupBlood().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py similarity index 91% rename from sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py rename to sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py index d467164e8..8f8b87a90 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_peripheral_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.id = "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "peripheral_blood" - self.sub_tissue = "peripheral_blood" + self.organ = "blood" + self.sub_tissue = "blood" self.has_celltypes = True self.class_maps = { @@ -55,8 +55,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PeripheralBlood2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py similarity index 91% rename from sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py rename to sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py index e735ba7b4..a27d7691d 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_peripheral_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.id = "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "peripheral_blood" - self.sub_tissue = "peripheral_blood" + self.organ = "blood" + self.sub_tissue = "blood" self.has_celltypes = True self.class_maps = { @@ -55,8 +55,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PeripheralBlood3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py new file mode 100644 index 000000000..9d8ef9088 --- /dev/null +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py @@ -0,0 +1,85 @@ +import anndata +import numpy as np +import os +import pandas +from typing import Union +from .external import DatasetBase + + +class Dataset(DatasetBase): + + id: str + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + **kwargs + ): + DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) + self.species = "mouse" + self.id = "mouse_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "blood" + self.sub_tissue = "blood" + self.has_celltypes = True + + self.class_maps = { + "0": { + 'B cell_Igha high(Peripheral_Blood)': 'B cell', + 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', + 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', + 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', + 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', + 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', + 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', + 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', + 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', + 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', + 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', + 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', + 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', + 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', + 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', + 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', + 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', + 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', + 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', + 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', + 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + celltypes = pandas.read_csv(fn_meta, index_col=1) + celltypes = celltypes.drop(['Unnamed: 0'], axis=1) + + data = pandas.read_csv(fn, sep=' ', header=0) + self.adata = anndata.AnnData(data.T) + self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() + self.adata.obs = celltypes.loc[self.adata.obs_names, :] + + self.adata.uns["lab"] = "Guo" + self.adata.uns["year"] = "2018" + self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" + self.adata.uns["protocol"] = "microwell-seq" + self.adata.uns["organ"] = self.organ + self.adata.uns["subtissue"] = self.sub_tissue # TODO + self.adata.uns["animal"] = "mouse" + self.adata.uns["id"] = self.id + self.adata.uns["wget_download"] = self.download_website + self.adata.uns["has_celltypes"] = self.has_celltypes + self.adata.uns["counts"] = 'raw' + self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.set_unkown_class_id(ids=[np.nan, "nan"]) + self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs["healthy"] = True + self.adata.obs["state_exact"] = "healthy" + + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py similarity index 91% rename from sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py rename to sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py index 98332ad7d..fcbb9fa42 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_peripheral_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" + self.id = "mouse_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "peripheral_blood" - self.sub_tissue = "peripheral_blood" + self.organ = "blood" + self.sub_tissue = "blood" self.has_celltypes = True self.class_maps = { @@ -55,8 +55,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PeripheralBlood4_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood4_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py similarity index 91% rename from sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py rename to sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py index 0d716fc5b..f48e2108c 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_peripheral_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "peripheral_blood" - self.sub_tissue = "peripheral_blood" + self.organ = "blood" + self.sub_tissue = "blood" self.has_celltypes = True self.class_maps = { @@ -55,8 +55,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PeripheralBlood1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood5_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/bone/__init__.py b/sfaira/data/mouse/bone/__init__.py new file mode 100644 index 000000000..9b6ccd006 --- /dev/null +++ b/sfaira/data/mouse/bone/__init__.py @@ -0,0 +1 @@ +from .mouse_bone import DatasetGroupBone \ No newline at end of file diff --git a/sfaira/data/mouse/mammary_gland/external.py b/sfaira/data/mouse/bone/external.py similarity index 100% rename from sfaira/data/mouse/mammary_gland/external.py rename to sfaira/data/mouse/bone/external.py diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle.py b/sfaira/data/mouse/bone/mouse_bone.py similarity index 59% rename from sfaira/data/mouse/limb_muscle/mouse_limb_muscle.py rename to sfaira/data/mouse/bone/mouse_bone.py index f37eec502..02fe69705 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle.py +++ b/sfaira/data/mouse/bone/mouse_bone.py @@ -3,12 +3,12 @@ from .external import DatasetGroupBase -from .mouse_limb_muscle_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_limb_muscle_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_limb_muscle_2018_microwell_han_001 import Dataset as Dataset0003 +from .mouse_bone_2019_10x_pisco_001 import Dataset as Dataset0001 +from .mouse_bone_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from .mouse_bone_2018_microwell_001 import Dataset as Dataset0003 -class DatasetGroupLimbmuscle(DatasetGroupBase): +class DatasetGroupBone(DatasetGroupBase): def __init__( self, @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupLimbmuscle().datasets) + from sfaira_extension.data.mouse import DatasetGroupBone + self.datasets.update(DatasetGroupBone().datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py b/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py similarity index 91% rename from sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py rename to sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py index 2a2704d75..93695c333 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py +++ b/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py @@ -17,9 +17,9 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_marrow_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_bone_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "marrow" + self.organ = "bone" self.sub_tissue = "marrow" self.has_celltypes = True @@ -49,8 +49,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/BoneMarrow1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "BoneMarrow1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py b/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py similarity index 90% rename from sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py rename to sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py index 9916020f8..b6126a2f6 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py @@ -18,7 +18,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_marrow_2019_10x_pisco_001_10.1101/661728" + self.id = "mouse_bone_2019_10x_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,7 +26,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "marrow" + self.organ = "bone" self.sub_tissue = "marrow" self.has_celltypes = True @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/marrow/tabula-muris-senis-droplet-processed-official-annotations-Marrow.h5ad") + fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-droplet-processed-official-annotations-Marrow.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/marrow/Marrow_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "bone", "Marrow_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py similarity index 90% rename from sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py rename to sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py index cf4e02f27..2d3dc7975 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py @@ -18,7 +18,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_marrow_2019_smartseq2_pisco_001_10.1101/661728" + self.id = "mouse_bone_2019_smartseq2_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,7 +26,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "marrow" + self.organ = "bone" self.sub_tissue = "marrow" self.has_celltypes = True @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/marrow/tabula-muris-senis-facs-processed-official-annotations-Marrow.h5ad") + fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-facs-processed-official-annotations-Marrow.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/marrow/Marrow_facs.h5ad") + fn = os.path.join(self.path, "mouse", "bone", "Marrow_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/brain/mouse_brain.py b/sfaira/data/mouse/brain/mouse_brain.py index 6a4147b06..4e09daddc 100644 --- a/sfaira/data/mouse/brain/mouse_brain.py +++ b/sfaira/data/mouse/brain/mouse_brain.py @@ -26,7 +26,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupBrain().datasets) + from sfaira_extension.data.mouse import DatasetGroupBrain + self.datasets.update(DatasetGroupBrain().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py index fd33e496b..3a692df7a 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py @@ -48,8 +48,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Brain1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py index 74aaf387b..9a8d0e629 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py @@ -48,8 +48,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Brain2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py index 084e66ab0..0444530fd 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -38,10 +38,10 @@ def __init__( def _load(self, fn=None): if fn is None: - fn = os.path.join(self.path, "mouse/temp_mouse_brain_atlas/matrix.mtx") - fn_barcodes = os.path.join(self.path, "mouse/temp_mouse_brain_atlas/barcodes.tsv") - fn_var = os.path.join(self.path, "mouse/temp_mouse_brain_atlas/genes.tsv") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_brain_atlas/annot_fullAggr.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "matrix.mtx") + fn_barcodes = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "barcodes.tsv") + fn_var = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "genes.tsv") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "annot_fullAggr.csv") self.adata = anndata.read_mtx(fn) self.adata = anndata.AnnData(self.adata.X.T) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py index 6ca638f84..f9b5f16e7 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py @@ -40,9 +40,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/brain/tabula-muris-senis-facs-processed-official-annotations-Brain_Non-Myeloid.h5ad") + fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Non-Myeloid.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/brain/Brain_Non-Myeloid_facs.h5ad") + fn = os.path.join(self.path, "mouse", "brain", "Brain_Non-Myeloid_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py index 15c487af3..6aae2b9a4 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py @@ -40,9 +40,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/brain/tabula-muris-senis-facs-processed-official-annotations-Brain_Myeloid.h5ad") + fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Myeloid.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/brain/Brain_Myeloid_facs.h5ad") + fn = os.path.join(self.path, "mouse", "brain", "Brain_Myeloid_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/colon/__init__.py b/sfaira/data/mouse/colon/__init__.py new file mode 100644 index 000000000..8e57ba03e --- /dev/null +++ b/sfaira/data/mouse/colon/__init__.py @@ -0,0 +1 @@ +from .mouse_colon import DatasetGroupColon \ No newline at end of file diff --git a/sfaira/data/mouse/marrow/external.py b/sfaira/data/mouse/colon/external.py similarity index 100% rename from sfaira/data/mouse/marrow/external.py rename to sfaira/data/mouse/colon/external.py diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine.py b/sfaira/data/mouse/colon/mouse_colon.py similarity index 60% rename from sfaira/data/mouse/large_intestine/mouse_large_intestine.py rename to sfaira/data/mouse/colon/mouse_colon.py index ebf9c14cc..3666f396a 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine.py +++ b/sfaira/data/mouse/colon/mouse_colon.py @@ -3,11 +3,11 @@ from .external import DatasetGroupBase -from .mouse_large_intestine_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_large_intestine_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from .mouse_colon_2019_10x_pisco_001 import Dataset as Dataset0001 +from .mouse_colon_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -class DatasetGroupLargeintestine(DatasetGroupBase): +class DatasetGroupColon(DatasetGroupBase): def __init__( self, @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupLargeintestine().datasets) + from sfaira_extension.data.mouse import DatasetGroupColon + self.datasets.update(DatasetGroupColon().datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py b/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py similarity index 85% rename from sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py rename to sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py index 17c8372db..17d315b2a 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py @@ -18,7 +18,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_large_intestine_2019_10x_pisco_001_10.1101/661728" + self.id = "mouse_colon_2019_10x_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,8 +26,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "large_intestine" - self.sub_tissue = "large_intestine" + self.organ = "colon" + self.sub_tissue = "colon" self.has_celltypes = True self.class_maps = { @@ -38,11 +38,11 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/large_intestine/Large_Intestine_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_droplet.h5ad") if self.source == "aws": - fn = os.path.join(self.path, "mouse/large_intestine/tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad") + fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/large_intestine/Large_Intestine_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..299b15ede --- /dev/null +++ b/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py @@ -0,0 +1,72 @@ +import anndata +import os +from typing import Union +from .external import DatasetBase + + +class Dataset(DatasetBase): + + id: str + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) + self.species = "mouse" + self.id = "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728" + self.source = source + if self.source == "aws": + self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" + elif self.source == "figshare": + self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" + else: + raise ValueError("source %s not recognized" % self.source) + self.organ = "colon" + self.sub_tissue = "colon" + self.has_celltypes = True + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-facs-processed-official-annotations-Large_Intestine.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self.adata = anndata.read_h5ad(fn) + if self.source == "aws": + self.adata.X = self.adata.raw.X + self.adata.var = self.adata.raw.var + del self.adata.raw + self.adata.obsm = {} + self.adata.varm = {} + self.adata.uns = {} + + self.adata.uns["lab"] = "Quake" + self.adata.uns["year"] = "2019" + self.adata.uns["doi"] = "10.1101/661728" + self.adata.uns["protocol"] = "smartseq2" + self.adata.uns["organ"] = self.organ + self.adata.uns["subtissue"] = self.sub_tissue + self.adata.uns["animal"] = "mouse" + self.adata.uns["id"] = self.id + self.adata.uns["wget_download"] = self.download_website + self.adata.uns["has_celltypes"] = self.has_celltypes + self.adata.uns["counts"] = 'norm' + # self.adata.obs["cell_ontology_class"] is already set + self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() + self.adata.obs["healthy"] = True + self.adata.obs["state_exact"] = "healthy" + + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm.py index 777a341bc..f8fc4d5f6 100644 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm.py +++ b/sfaira/data/mouse/diaphragm/mouse_diaphragm.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupDiaphragm().datasets) + from sfaira_extension.data.mouse import DatasetGroupDiaphragm + self.datasets.update(DatasetGroupDiaphragm().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py index 02e25518e..665983cb3 100644 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/diaphragm/tabula-muris-senis-facs-processed-official-annotations-Diaphragm.h5ad") + fn = os.path.join(self.path, "mouse", "diaphragm", "tabula-muris-senis-facs-processed-official-annotations-Diaphragm.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/diaphragm/Diaphragm_facs.h5ad") + fn = os.path.join(self.path, "mouse", "diaphragm", "Diaphragm_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/fat/__init__.py b/sfaira/data/mouse/fat/__init__.py deleted file mode 100644 index b4ea53fe0..000000000 --- a/sfaira/data/mouse/fat/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_fat import DatasetGroupFat \ No newline at end of file diff --git a/sfaira/data/mouse/femalegonad/__init__.py b/sfaira/data/mouse/femalegonad/__init__.py new file mode 100644 index 000000000..6cca0c4d4 --- /dev/null +++ b/sfaira/data/mouse/femalegonad/__init__.py @@ -0,0 +1 @@ +from .mouse_femalegonad import DatasetGroupFemalegonad \ No newline at end of file diff --git a/sfaira/data/mouse/ovary/external.py b/sfaira/data/mouse/femalegonad/external.py similarity index 100% rename from sfaira/data/mouse/ovary/external.py rename to sfaira/data/mouse/femalegonad/external.py diff --git a/sfaira/data/mouse/testis/mouse_testis.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad.py similarity index 61% rename from sfaira/data/mouse/testis/mouse_testis.py rename to sfaira/data/mouse/femalegonad/mouse_femalegonad.py index 7f7586088..fc35c3ef3 100644 --- a/sfaira/data/mouse/testis/mouse_testis.py +++ b/sfaira/data/mouse/femalegonad/mouse_femalegonad.py @@ -3,11 +3,11 @@ from .external import DatasetGroupBase -from .mouse_testis_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_testis_2018_microwell_han_002 import Dataset as Dataset0002 +from .mouse_femalegonad_2018_microwell_han_001 import Dataset as Dataset0001 +from .mouse_femalegonad_2018_microwell_han_002 import Dataset as Dataset0002 -class DatasetGroupTestis(DatasetGroupBase): +class DatasetGroupFemalegonad(DatasetGroupBase): def __init__( self, @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupTestis().datasets) + from sfaira_extension.data.mouse import DatasetGroupFemalegonad + self.datasets.update(DatasetGroupFemalegonad().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py similarity index 90% rename from sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py rename to sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py index 31406e47b..0729ef941 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py +++ b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py @@ -17,10 +17,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_ovary_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_femalegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "ovary" - self.sub_tissue = "ovary" + self.organ = "femalegonad" + self.sub_tissue = "femalegonad" self.has_celltypes = True self.class_maps = { @@ -46,8 +46,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Ovary1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py similarity index 90% rename from sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py rename to sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py index a033284b0..24b794523 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py +++ b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py @@ -17,10 +17,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_ovary_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.id = "mouse_femalegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "ovary" - self.sub_tissue = "ovary" + self.organ = "femalegonad" + self.sub_tissue = "femalegonad" self.has_celltypes = True self.class_maps = { @@ -46,8 +46,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Ovary2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/heart/mouse_heart.py b/sfaira/data/mouse/heart/mouse_heart.py index ad32d3946..ca7e6af3d 100644 --- a/sfaira/data/mouse/heart/mouse_heart.py +++ b/sfaira/data/mouse/heart/mouse_heart.py @@ -25,7 +25,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupHeart().datasets) + from sfaira_extension.data.mouse import DatasetGroupHeart + self.datasets.update(DatasetGroupHeart().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py index f80ec2445..d5b05893c 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py @@ -40,9 +40,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/heart/tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad") + fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/heart/Heart_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "heart", "Heart_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py index d7f35b3ab..f5a2eb17a 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/heart/tabula-muris-senis-facs-processed-official-annotations-Heart.h5ad") + fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Heart.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/heart/Heart_facs.h5ad") + fn = os.path.join(self.path, "mouse", "heart", "Heart_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py index e1e2b287e..170b815f2 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py @@ -39,7 +39,7 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/heart/tabula-muris-senis-facs-processed-official-annotations-Aorta.h5ad") + fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Aorta.h5ad") elif self.source == "figshare": raise ValueError("not defined") else: diff --git a/sfaira/data/mouse/ileum/__init__.py b/sfaira/data/mouse/ileum/__init__.py new file mode 100644 index 000000000..89c13450a --- /dev/null +++ b/sfaira/data/mouse/ileum/__init__.py @@ -0,0 +1 @@ +from .mouse_ileum import DatasetGroupIleum \ No newline at end of file diff --git a/sfaira/data/mouse/peripheral_blood/external.py b/sfaira/data/mouse/ileum/external.py similarity index 100% rename from sfaira/data/mouse/peripheral_blood/external.py rename to sfaira/data/mouse/ileum/external.py diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine.py b/sfaira/data/mouse/ileum/mouse_ileum.py similarity index 57% rename from sfaira/data/mouse/small_intestine/mouse_small_intestine.py rename to sfaira/data/mouse/ileum/mouse_ileum.py index b7f86a1e0..f56d2c46e 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine.py +++ b/sfaira/data/mouse/ileum/mouse_ileum.py @@ -3,12 +3,12 @@ from .external import DatasetGroupBase -from .mouse_small_intestine_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_small_intestine_2018_microwell_han_002 import Dataset as Dataset0002 -from .mouse_small_intestine_2018_microwell_han_003 import Dataset as Dataset0003 +from .mouse_ileum_2018_microwell_han_001 import Dataset as Dataset0001 +from .mouse_ileum_2018_microwell_han_002 import Dataset as Dataset0002 +from .mouse_ileum_2018_microwell_han_003 import Dataset as Dataset0003 -class DatasetGroupSmallintestine(DatasetGroupBase): +class DatasetGroupIleum(DatasetGroupBase): def __init__( self, @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupSmallintestine().datasets) + from sfaira_extension.data.mouse import DatasetGroupIleum + self.datasets.update(DatasetGroupIleum().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py similarity index 91% rename from sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py rename to sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py index 6dbb2d9c2..69c3b3c91 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py +++ b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_small_intestine_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "small_intestine" - self.sub_tissue = "small_intestine" + self.organ = "ileum" + self.sub_tissue = "ileum" self.has_celltypes = True self.class_maps = { @@ -57,8 +57,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/SmallIntestine1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py similarity index 91% rename from sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py rename to sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py index 2cadc4d9e..6dc73705b 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py +++ b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_small_intestine_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.id = "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "small_intestine" - self.sub_tissue = "small_intestine" + self.organ = "ileum" + self.sub_tissue = "ileum" self.has_celltypes = True self.class_maps = { @@ -57,8 +57,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/SmallIntestine2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py similarity index 91% rename from sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py rename to sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py index 4de9bb758..3fefaf0b5 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py +++ b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_small_intestine_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.id = "mouse_ileum_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "small_intestine" - self.sub_tissue = "small_intestine" + self.organ = "ileum" + self.sub_tissue = "ileum" self.has_celltypes = True self.class_maps = { @@ -57,8 +57,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/SmallIntestine3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/kidney/mouse_kidney.py b/sfaira/data/mouse/kidney/mouse_kidney.py index d5e121da9..ac9d34cfc 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney.py +++ b/sfaira/data/mouse/kidney/mouse_kidney.py @@ -26,7 +26,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupKidney().datasets) + from sfaira_extension.data.mouse import DatasetGroupKidney + self.datasets.update(DatasetGroupKidney().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py index f813d68ea..41dd1438b 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py @@ -34,8 +34,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Kidney1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py index 033fa731e..7ee5882e9 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py @@ -65,8 +65,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Kidney2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py index c69d74681..3cbdd2ac0 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py @@ -42,9 +42,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/kidney/tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad") + fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/kidney/Kidney_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "kidney", "Kidney_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py index ae9e88206..ceff6e506 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py @@ -41,9 +41,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/kidney/tabula-muris-senis-facs-processed-official-annotations-Kidney.h5ad") + fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-facs-processed-official-annotations-Kidney.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/kidney/Kidney_facs.h5ad") + fn = os.path.join(self.path, "mouse", "kidney", "Kidney_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/large_intestine/__init__.py b/sfaira/data/mouse/large_intestine/__init__.py deleted file mode 100644 index dac54df27..000000000 --- a/sfaira/data/mouse/large_intestine/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_large_intestine import DatasetGroupLargeintestine \ No newline at end of file diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py index 766e6a5fa..e69de29bb 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py @@ -1,73 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_large_intestine_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "large_intestine" - self.sub_tissue = "large_intestine" - self.has_celltypes = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/large_intestine/Large_Intestine_facs.h5ad") - if self.source == "aws": - fn = os.path.join(self.path, "mouse/large_intestine/tabula-muris-senis-facs-processed-official-annotations-Large_Intestine.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/large_intestine/Large_Intestine_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/limb_muscle/__init__.py b/sfaira/data/mouse/limb_muscle/__init__.py deleted file mode 100644 index 9a3be10fb..000000000 --- a/sfaira/data/mouse/limb_muscle/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_limb_muscle import DatasetGroupLimbmuscle \ No newline at end of file diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py index 0515158ae..e69de29bb 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py @@ -1,70 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_limb_muscle_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "limb_muscle" - self.sub_tissue = "limb_muscle" - self.has_celltypes = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse/limb_muscle/tabula-muris-senis-facs-processed-official-annotations-Limb_Muscle.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/limb_muscle/Limb_Muscle_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/liver/mouse_liver.py b/sfaira/data/mouse/liver/mouse_liver.py index a6df91c00..a78f4af58 100644 --- a/sfaira/data/mouse/liver/mouse_liver.py +++ b/sfaira/data/mouse/liver/mouse_liver.py @@ -26,7 +26,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupLiver().datasets) + from sfaira_extension.data.mouse import DatasetGroupLiver + self.datasets.update(DatasetGroupLiver().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py index edd19a92e..f01bcd10b 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Liver1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py index 24f0a03aa..03a83405d 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py @@ -46,8 +46,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Liver2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py index 6131ea70c..b75e14106 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/liver/tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad") + fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/liver/Liver_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "liver", "Liver_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py index 245ac729d..e8750f2b0 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/liver/tabula-muris-senis-facs-processed-official-annotations-Liver.h5ad") + fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-facs-processed-official-annotations-Liver.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/liver/Liver_facs.h5ad") + fn = os.path.join(self.path, "mouse", "liver", "Liver_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/lung/mouse_lung.py b/sfaira/data/mouse/lung/mouse_lung.py index ca93dd5b8..aa5a6f0ec 100644 --- a/sfaira/data/mouse/lung/mouse_lung.py +++ b/sfaira/data/mouse/lung/mouse_lung.py @@ -28,7 +28,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupLung().datasets) + from sfaira_extension.data.mouse import DatasetGroupLung + self.datasets.update(DatasetGroupLung().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py index 1ca4e5964..1d198b276 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py @@ -66,8 +66,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Lung1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py index 2a8bea9df..43102d566 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py @@ -66,8 +66,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Lung2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py index 6d0f14686..f12abcc17 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py @@ -66,8 +66,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Lung3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py index 015eb167a..3590bce81 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/lung/tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad") + fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/lung/Lung_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "lung", "Lung_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py index 10c5e48a2..db656e485 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/lung/tabula-muris-senis-facs-processed-official-annotations-Lung.h5ad") + fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-facs-processed-official-annotations-Lung.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/lung/Lung_facs.h5ad") + fn = os.path.join(self.path, "mouse", "lung", "Lung_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/malegonad/__init__.py b/sfaira/data/mouse/malegonad/__init__.py new file mode 100644 index 000000000..a56dbc2f4 --- /dev/null +++ b/sfaira/data/mouse/malegonad/__init__.py @@ -0,0 +1 @@ +from .mouse_malegonad import DatasetGroupMalegonad \ No newline at end of file diff --git a/sfaira/data/mouse/small_intestine/external.py b/sfaira/data/mouse/malegonad/external.py similarity index 100% rename from sfaira/data/mouse/small_intestine/external.py rename to sfaira/data/mouse/malegonad/external.py diff --git a/sfaira/data/mouse/ovary/mouse_ovary.py b/sfaira/data/mouse/malegonad/mouse_malegonad.py similarity index 61% rename from sfaira/data/mouse/ovary/mouse_ovary.py rename to sfaira/data/mouse/malegonad/mouse_malegonad.py index 91f6ca71b..da9610f39 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary.py +++ b/sfaira/data/mouse/malegonad/mouse_malegonad.py @@ -3,11 +3,11 @@ from .external import DatasetGroupBase -from .mouse_ovary_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_ovary_2018_microwell_han_002 import Dataset as Dataset0002 +from .mouse_malegonad_2018_microwell_han_001 import Dataset as Dataset0001 +from .mouse_malegonad_2018_microwell_han_002 import Dataset as Dataset0002 -class DatasetGroupOvary(DatasetGroupBase): +class DatasetGroupMalegonad(DatasetGroupBase): def __init__( self, @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupOvary().datasets) + from sfaira_extension.data.mouse import DatasetGroupMalegonad + self.datasets.update(DatasetGroupMalegonad().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py similarity index 91% rename from sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py rename to sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py index 31ba920d7..c6cf5653d 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py +++ b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_testis_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_malegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "testis" - self.sub_tissue = "testis" + self.organ = "malegonad" + self.sub_tissue = "malegonad" self.has_celltypes = True self.class_maps = { @@ -53,8 +53,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Testis1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py similarity index 91% rename from sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py rename to sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py index 9cc44c15b..ca536a683 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py +++ b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_testis_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.id = "mouse_malegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "testis" - self.sub_tissue = "testis" + self.organ = "malegonad" + self.sub_tissue = "malegonad" self.has_celltypes = True self.class_maps = { @@ -53,8 +53,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Testis2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/mammary_gland/__init__.py b/sfaira/data/mouse/mammary_gland/__init__.py deleted file mode 100644 index 0c53ff90f..000000000 --- a/sfaira/data/mouse/mammary_gland/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_mammary_gland import DatasetGroupMammaryGland \ No newline at end of file diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py index 012bcde2c..e69de29bb 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py @@ -1,70 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammary_gland_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "mammary_gland" - self.sub_tissue = "mammary_gland" - self.has_celltypes = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse/mammary_gland/tabula-muris-senis-facs-processed-official-annotations-Mammary_Gland.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/mammary_gland/Mammary_Gland_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammarygland/__init__.py b/sfaira/data/mouse/mammarygland/__init__.py new file mode 100644 index 000000000..6a42b03d9 --- /dev/null +++ b/sfaira/data/mouse/mammarygland/__init__.py @@ -0,0 +1 @@ +from .mouse_mammarygland import DatasetGroupMammaryGland \ No newline at end of file diff --git a/sfaira/data/mouse/testis/external.py b/sfaira/data/mouse/mammarygland/external.py similarity index 100% rename from sfaira/data/mouse/testis/external.py rename to sfaira/data/mouse/mammarygland/external.py diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland.py similarity index 57% rename from sfaira/data/mouse/mammary_gland/mouse_mammary_gland.py rename to sfaira/data/mouse/mammarygland/mouse_mammarygland.py index 9374cee4e..d7cdbd797 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland.py @@ -3,12 +3,12 @@ from .external import DatasetGroupBase -from .mouse_mammary_gland_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_mammary_gland_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_mammary_gland_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_mammary_gland_2018_microwell_han_002 import Dataset as Dataset0004 -from .mouse_mammary_gland_2018_microwell_han_003 import Dataset as Dataset0005 -from .mouse_mammary_gland_2018_microwell_han_004 import Dataset as Dataset0006 +from .mouse_mammarygland_2019_10x_pisco_001 import Dataset as Dataset0001 +from .mouse_mammarygland_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from .mouse_mammarygland_2018_microwell_han_001 import Dataset as Dataset0003 +from .mouse_mammarygland_2018_microwell_han_002 import Dataset as Dataset0004 +from .mouse_mammarygland_2018_microwell_han_003 import Dataset as Dataset0005 +from .mouse_mammarygland_2018_microwell_han_004 import Dataset as Dataset0006 class DatasetGroupMammaryGland(DatasetGroupBase): @@ -30,7 +30,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupMammaryGland().datasets) + from sfaira_extension.data.mouse import DatasetGroupMammaryGland + self.datasets.update(DatasetGroupMammaryGland().datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py similarity index 90% rename from sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py rename to sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py index 3ffe43c9b..0a01aff3d 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py @@ -17,10 +17,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_mammary_gland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.id = "mouse_mammarygland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammary_gland" - self.sub_tissue = "mammary_gland" + self.organ = "mammarygland" + self.sub_tissue = "mammarygland" self.has_celltypes = True self.class_maps = { @@ -50,8 +50,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/MammaryGland.Virgin3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) @@ -79,4 +79,3 @@ def _load(self, fn=None): self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) - diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py similarity index 90% rename from sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py rename to sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py index 2806abd02..661d1ff65 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py @@ -17,10 +17,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_mammary_gland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" + self.id = "mouse_mammarygland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammary_gland" - self.sub_tissue = "mammary_gland" + self.organ = "mammarygland" + self.sub_tissue = "mammarygland" self.has_celltypes = True self.class_maps = { @@ -50,8 +50,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/MammaryGland.Virgin4_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py similarity index 90% rename from sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py rename to sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py index 52d749e52..a23535a32 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py @@ -17,10 +17,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_mammary_gland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammary_gland" - self.sub_tissue = "mammary_gland" + self.organ = "mammarygland" + self.sub_tissue = "mammarygland" self.has_celltypes = True self.class_maps = { @@ -50,8 +50,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/MammaryGland.Virgin1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py similarity index 90% rename from sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py rename to sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py index 42674693a..a677d911b 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py @@ -17,10 +17,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_mammary_gland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.id = "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammary_gland" - self.sub_tissue = "mammary_gland" + self.organ = "mammarygland" + self.sub_tissue = "mammarygland" self.has_celltypes = True self.class_maps = { @@ -50,8 +50,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/MammaryGland.Virgin2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin4_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py similarity index 87% rename from sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py rename to sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py index ed91fbb7b..f90cef8ff 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py @@ -16,7 +16,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_mammary_gland_2019_10x_pisco_001_10.1101/661728" + self.id = "mouse_mammarygland_2019_10x_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -24,8 +24,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "mammary_gland" - self.sub_tissue = "mammary_gland" + self.organ = "mammarygland" + self.sub_tissue = "mammarygland" self.has_celltypes = True self.class_maps = { @@ -37,9 +37,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/mammary_gland/tabula-muris-senis-droplet-processed-official-annotations-Mammary_Gland.h5ad") + fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-droplet-processed-official-annotations-Mammary_Gland.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/mammary_gland/Mammary_Gland_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..b442c7f20 --- /dev/null +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py @@ -0,0 +1,69 @@ +import anndata +import os +from typing import Union +from .external import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) + self.species = "mouse" + self.id = "mouse_mammarygland_2019_smartseq2_pisco_001_10.1101/661728" + self.source = source + if self.source == "aws": + self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" + elif self.source == "figshare": + self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" + else: + raise ValueError("source %s not recognized" % self.source) + self.organ = "mammarygland" + self.sub_tissue = "mammarygland" + self.has_celltypes = True + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-facs-processed-official-annotations-Mammary_Gland.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self.adata = anndata.read_h5ad(fn) + if self.source == "aws": + self.adata.X = self.adata.raw.X + self.adata.var = self.adata.raw.var + del self.adata.raw + self.adata.obsm = {} + self.adata.varm = {} + self.adata.uns = {} + + self.adata.uns["lab"] = "Quake" + self.adata.uns["year"] = "2019" + self.adata.uns["doi"] = "10.1101/661728" + self.adata.uns["protocol"] = "smartseq2" + self.adata.uns["organ"] = self.organ + self.adata.uns["subtissue"] = self.sub_tissue + self.adata.uns["animal"] = "mouse" + self.adata.uns["id"] = self.id + self.adata.uns["wget_download"] = self.download_website + self.adata.uns["has_celltypes"] = self.has_celltypes + self.adata.uns["counts"] = 'norm' + # self.adata.obs["cell_ontology_class"] is already set + self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() + self.adata.obs["healthy"] = True + self.adata.obs["state_exact"] = "healthy" + + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') diff --git a/sfaira/data/mouse/marrow/__init__.py b/sfaira/data/mouse/marrow/__init__.py deleted file mode 100644 index b2b203505..000000000 --- a/sfaira/data/mouse/marrow/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_marrow import DatasetGroupMarrow \ No newline at end of file diff --git a/sfaira/data/mouse/muscle/__init__.py b/sfaira/data/mouse/muscle/__init__.py new file mode 100644 index 000000000..fa8cb5cfd --- /dev/null +++ b/sfaira/data/mouse/muscle/__init__.py @@ -0,0 +1 @@ +from .mouse_muscle import DatasetGroupMuscle \ No newline at end of file diff --git a/sfaira/data/mouse/trachae/external.py b/sfaira/data/mouse/muscle/external.py similarity index 100% rename from sfaira/data/mouse/trachae/external.py rename to sfaira/data/mouse/muscle/external.py diff --git a/sfaira/data/mouse/marrow/mouse_marrow.py b/sfaira/data/mouse/muscle/mouse_muscle.py similarity index 61% rename from sfaira/data/mouse/marrow/mouse_marrow.py rename to sfaira/data/mouse/muscle/mouse_muscle.py index 4a28ce069..d3eb6f583 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow.py +++ b/sfaira/data/mouse/muscle/mouse_muscle.py @@ -3,12 +3,12 @@ from .external import DatasetGroupBase -from .mouse_marrow_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_marrow_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_marrow_2018_microwell_001 import Dataset as Dataset0003 +from .mouse_muscle_2019_10x_pisco_001 import Dataset as Dataset0001 +from .mouse_muscle_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from .mouse_muscle_2018_microwell_han_001 import Dataset as Dataset0003 -class DatasetGroupMarrow(DatasetGroupBase): +class DatasetGroupMuscle(DatasetGroupBase): def __init__( self, @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupMarrow().datasets) + from sfaira_extension.data.mouse import DatasetGroupMuscle + self.datasets.update(DatasetGroupMuscle().datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py similarity index 90% rename from sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py rename to sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py index 7dac35d7f..ce3b6f01e 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py +++ b/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_limb_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "limb_muscle" - self.sub_tissue = "limb_muscle" + self.organ = "muscle" + self.sub_tissue = "muscle" self.has_celltypes = True self.class_maps = { @@ -51,8 +51,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Muscle_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Muscle_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py similarity index 87% rename from sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py rename to sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py index f538e283c..c88b0e4a3 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py @@ -16,7 +16,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_limb_muscle_2019_10x_pisco_001_10.1101/661728" + self.id = "mouse_muscle_2019_10x_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -24,8 +24,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "limb_muscle" - self.sub_tissue = "limb_muscle" + self.organ = "muscle" + self.sub_tissue = "muscle" self.has_celltypes = True self.class_maps = { @@ -37,9 +37,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/limb_muscle/tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad") + fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/limb_muscle/Limb_Muscle_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py similarity index 88% rename from sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py rename to sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py index a320624bd..b71e072d1 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py @@ -16,7 +16,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_fat_2019_smartseq2_pisco_002_10.1101/661728" + self.id = "mouse_muscle_2019_smartseq2_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -24,8 +24,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "fat" - self.sub_tissue = "fat" + self.organ = "muscle" + self.sub_tissue = "muscle" self.has_celltypes = True self.class_maps = { @@ -37,9 +37,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/fat/tabula-muris-senis-facs-processed-official-annotations-GAT.h5ad") + fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-facs-processed-official-annotations-Limb_Muscle.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/fat/gat_facs.h5ad") + fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/ovary/__init__.py b/sfaira/data/mouse/ovary/__init__.py deleted file mode 100644 index 20d9cccfc..000000000 --- a/sfaira/data/mouse/ovary/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_ovary import DatasetGroupOvary \ No newline at end of file diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas.py b/sfaira/data/mouse/pancreas/mouse_pancreas.py index 2818069a1..b036de4b3 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas.py @@ -40,7 +40,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupPancreas().datasets) + from sfaira_extension.data.mouse import DatasetGroupPancreas + self.datasets.update(DatasetGroupPancreas().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py index dd6814c75..34848662f 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py @@ -56,8 +56,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Pancreas_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Pancreas_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py index 19590fe66..85ef8fb2d 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/pancreas/tabula-muris-senis-droplet-processed-official-annotations-Pancreas.h5ad") + fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-droplet-processed-official-annotations-Pancreas.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/pancreas/Pancreas_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py index 18c2574f7..671eba3cc 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308545_NOD_08w_A") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308545_NOD_08w_A_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py index dc7ba1914..76df51fb8 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308547_NOD_08w_C") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308547_NOD_08w_C_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py index 307ca856e..a027e8176 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308548_NOD_14w_A") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308548_NOD_14w_A_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py index 90b0a7147..5f2e685a0 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308549_NOD_14w_B") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308549_NOD_14w_B_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py index 18c413c0a..3b43226ec 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308550_NOD_14w_C") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308550_NOD_14w_C_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py index c3bb0281d..6735a21ad 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308551_NOD_16w_A") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308551_NOD_16w_A_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py index 570a9596d..447776c5a 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308552_NOD_16w_B") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308552_NOD_16w_B_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py index ba12aa485..1fde77cb5 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308553_NOD_16w_C") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308553_NOD_16w_C_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py index d78535bfe..7b97fa833 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py @@ -41,9 +41,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/pancreas/tabula-muris-senis-facs-processed-official-annotations-Pancreas.h5ad") + fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-facs-processed-official-annotations-Pancreas.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/pancreas/Pancreas_facs.h5ad") + fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/peripheral_blood/__init__.py b/sfaira/data/mouse/peripheral_blood/__init__.py deleted file mode 100644 index 51ba0f4ab..000000000 --- a/sfaira/data/mouse/peripheral_blood/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_peripheral_blood import DatasetGroupPeripheralBlood \ No newline at end of file diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py index fece325ad..e69de29bb 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py @@ -1,86 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_peripheral_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "peripheral_blood" - self.sub_tissue = "peripheral_blood" - self.has_celltypes = True - - self.class_maps = { - "0": { - 'B cell_Igha high(Peripheral_Blood)': 'B cell', - 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', - 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', - 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', - 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', - 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', - 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', - 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', - 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', - 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', - 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', - 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', - 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', - 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', - 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PeripheralBlood5_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/placenta/mouse_placenta.py b/sfaira/data/mouse/placenta/mouse_placenta.py index da38d98d6..412dda22b 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta.py +++ b/sfaira/data/mouse/placenta/mouse_placenta.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupPlacenta().datasets) + from sfaira_extension.data.mouse import DatasetGroupPlacenta + self.datasets.update(DatasetGroupPlacenta().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py index 636fa1f15..71e6c67e4 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py @@ -62,8 +62,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PlacentaE14.1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py index 076cb182b..55bf9197d 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py @@ -62,8 +62,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PlacentaE14.2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/prostate/mouse_prostate.py b/sfaira/data/mouse/prostate/mouse_prostate.py index ebf3dd5b3..68354a363 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate.py +++ b/sfaira/data/mouse/prostate/mouse_prostate.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupProstate().datasets) + from sfaira_extension.data.mouse import DatasetGroupProstate + self.datasets.update(DatasetGroupProstate().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py index 9ccac2ac9..99040cb76 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py @@ -40,8 +40,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Prostate1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py index 46ddc0a83..7f6022c77 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py @@ -40,8 +40,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Prostate2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/rib/mouse_rib.py b/sfaira/data/mouse/rib/mouse_rib.py index 64ec6b62f..c2a80b1a0 100644 --- a/sfaira/data/mouse/rib/mouse_rib.py +++ b/sfaira/data/mouse/rib/mouse_rib.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupRib().datasets) + from sfaira_extension.data.mouse import DatasetGroupRib + self.datasets.update(DatasetGroupRib().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py index 17ab250bc..504019f64 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py @@ -57,8 +57,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/NeonatalRib1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py index 676c15725..65718f86f 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py @@ -57,8 +57,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/NeonatalRib2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py index 0268b6e81..b8d5ff7de 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py @@ -57,8 +57,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/NeonatalRib3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/skin/mouse_skin.py b/sfaira/data/mouse/skin/mouse_skin.py index 3ed4c307c..b8b33a0e2 100644 --- a/sfaira/data/mouse/skin/mouse_skin.py +++ b/sfaira/data/mouse/skin/mouse_skin.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupSkin().datasets) + from sfaira_extension.data.mouse import DatasetGroupSkin + self.datasets.update(DatasetGroupSkin().datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py index 262885440..65b961511 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/skin/tabula-muris-senis-droplet-processed-official-annotations-Skin.h5ad") + fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-droplet-processed-official-annotations-Skin.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/skin/Skin_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "skin", "Skin_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py index b582e2011..04ddff85c 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py @@ -38,11 +38,11 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/skin/Skin_facs.h5ad") + fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") if self.source == "aws": - fn = os.path.join(self.path, "mouse/skin/tabula-muris-senis-facs-processed-official-annotations-Skin.h5ad") + fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-facs-processed-official-annotations-Skin.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/skin/Skin_facs.h5ad") + fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/small_intestine/__init__.py b/sfaira/data/mouse/small_intestine/__init__.py deleted file mode 100644 index 87c890041..000000000 --- a/sfaira/data/mouse/small_intestine/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_small_intestine import DatasetGroupSmallintestine \ No newline at end of file diff --git a/sfaira/data/mouse/spleen/mouse_spleen.py b/sfaira/data/mouse/spleen/mouse_spleen.py index 43d4cd15a..27ab1559e 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen.py +++ b/sfaira/data/mouse/spleen/mouse_spleen.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupSpleen().datasets) + from sfaira_extension.data.mouse import DatasetGroupSpleen + self.datasets.update(DatasetGroupSpleen().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py index 5e31c0eea..1b5174b1f 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py @@ -45,8 +45,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Spleen_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Spleen_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py index 451baa207..84118862e 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/spleen/tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") + fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/spleen/Spleen_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "spleen", "Spleen_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py index 9bde5f267..00ae4b975 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/spleen/tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") + fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/spleen/Spleen_facs.h5ad") + fn = os.path.join(self.path, "mouse", "spleen", "Spleen_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/stomach/mouse_stomach.py b/sfaira/data/mouse/stomach/mouse_stomach.py index 9cee2ea17..cabeaf994 100644 --- a/sfaira/data/mouse/stomach/mouse_stomach.py +++ b/sfaira/data/mouse/stomach/mouse_stomach.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupStomach().datasets) + from sfaira_extension.data.mouse import DatasetGroupStomach + self.datasets.update(DatasetGroupStomach().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py index 56ab7c683..64501c484 100644 --- a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py +++ b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py @@ -51,8 +51,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Stomach_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas", "500more_dge", "Stomach_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/testis/__init__.py b/sfaira/data/mouse/testis/__init__.py deleted file mode 100644 index cbd4fa1e7..000000000 --- a/sfaira/data/mouse/testis/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_testis import DatasetGroupTestis \ No newline at end of file diff --git a/sfaira/data/mouse/thymus/mouse_thymus.py b/sfaira/data/mouse/thymus/mouse_thymus.py index 3f7ed534f..fe707f8fd 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus.py +++ b/sfaira/data/mouse/thymus/mouse_thymus.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupThymus().datasets) + from sfaira_extension.data.mouse import DatasetGroupThymus + self.datasets.update(DatasetGroupThymus().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py index 125b2ad69..15f68b11f 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py @@ -40,8 +40,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Thymus1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Thymus1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py index 188d26cb5..f0f1e370d 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py @@ -38,9 +38,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/thymus/tabula-muris-senis-droplet-processed-official-annotations-Thymus.h5ad") + fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-droplet-processed-official-annotations-Thymus.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/thymus/Thymus_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "thymus", "Thymus_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py index c8f960e2a..11f5971fd 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py @@ -38,9 +38,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/thymus/tabula-muris-senis-facs-processed-official-annotations-Thymus.h5ad") + fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-facs-processed-official-annotations-Thymus.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/thymus/Thymus_facs.h5ad") + fn = os.path.join(self.path, "mouse", "thymus", "Thymus_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/tongue/mouse_tongue.py b/sfaira/data/mouse/tongue/mouse_tongue.py index 392933740..382e06f5e 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue.py +++ b/sfaira/data/mouse/tongue/mouse_tongue.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupTongue().datasets) + from sfaira_extension.data.mouse import DatasetGroupTongue + self.datasets.update(DatasetGroupTongue().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py index 2db82fdfb..d2fcce167 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py @@ -40,9 +40,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/tongue/tabula-muris-senis-droplet-processed-official-annotations-Tongue.h5ad") + fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-droplet-processed-official-annotations-Tongue.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/tongue/Tongue_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "tongue", "Tongue_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py index a168f163e..c412c6d00 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py @@ -40,9 +40,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/tongue/tabula-muris-senis-facs-processed-official-annotations-Tongue.h5ad") + fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-facs-processed-official-annotations-Tongue.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/tongue/Tongue_facs.h5ad") + fn = os.path.join(self.path, "mouse", "tongue", "Tongue_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/trachae/__init__.py b/sfaira/data/mouse/trachea/__init__.py similarity index 100% rename from sfaira/data/mouse/trachae/__init__.py rename to sfaira/data/mouse/trachea/__init__.py diff --git a/sfaira/data/mouse/trachea/external.py b/sfaira/data/mouse/trachea/external.py new file mode 100644 index 000000000..9f4e3db68 --- /dev/null +++ b/sfaira/data/mouse/trachea/external.py @@ -0,0 +1 @@ +from sfaira.data import DatasetBase, DatasetGroupBase diff --git a/sfaira/data/mouse/trachae/mouse_trachea.py b/sfaira/data/mouse/trachea/mouse_trachea.py similarity index 84% rename from sfaira/data/mouse/trachae/mouse_trachea.py rename to sfaira/data/mouse/trachea/mouse_trachea.py index 2ef2426a6..bb578a632 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea.py +++ b/sfaira/data/mouse/trachea/mouse_trachea.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupTrachea().datasets) + from sfaira_extension.data.mouse import DatasetGroupTrachea + self.datasets.update(DatasetGroupTrachea().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py similarity index 90% rename from sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py rename to sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py index df3dd8cb1..f19ec1043 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py @@ -39,11 +39,11 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/trachea/Trachea_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "trachea", "Trachea_droplet.h5ad") if self.source == "aws": - fn = os.path.join(self.path, "mouse/trachea/tabula-muris-senis-droplet-processed-official-annotations-Trachea.h5ad") + fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-droplet-processed-official-annotations-Trachea.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/trachea/Trachea_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "trachea", "Trachea_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py similarity index 93% rename from sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py rename to sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py index 39b5c48c2..2e7a16097 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py @@ -40,9 +40,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/trachea/tabula-muris-senis-facs-processed-official-annotations-Trachea.h5ad") + fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-facs-processed-official-annotations-Trachea.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/trachea/Trachea_facs.h5ad") + fn = os.path.join(self.path, "mouse", "trachea", "Trachea_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/uterus/mouse_uterus.py b/sfaira/data/mouse/uterus/mouse_uterus.py index 9b6e6e9c2..b7a7ea6fc 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus.py +++ b/sfaira/data/mouse/uterus/mouse_uterus.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupUterus().datasets) + from sfaira_extension.data.mouse import DatasetGroupUterus + self.datasets.update(DatasetGroupUterus().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py index 5ffd95ce4..327445518 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py @@ -53,8 +53,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Uterus1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py index db2724715..5bd723063 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py @@ -53,8 +53,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Uterus2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/utils/create_meta_mouse.py b/sfaira/data/utils/create_meta_mouse.py index a56678c74..1634af953 100644 --- a/sfaira/data/utils/create_meta_mouse.py +++ b/sfaira/data/utils/create_meta_mouse.py @@ -15,29 +15,29 @@ "bladder": mouse.DatasetGroupBladder(path=path, meta_path=path_meta), "brain": mouse.DatasetGroupBrain(path=path, meta_path=path_meta), "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=path_meta), - "fat": mouse.DatasetGroupFat(path=path, meta_path=path_meta), + "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=path_meta), "heart": mouse.DatasetGroupHeart(path=path, meta_path=path_meta), "kidney": mouse.DatasetGroupKidney(path=path, meta_path=path_meta), - "largeintestine": mouse.DatasetGroupLargeintestine(path=path, meta_path=path_meta), - "limbmuscle": mouse.DatasetGroupLimbmuscle(path=path, meta_path=path_meta), + "colon": mouse.DatasetGroupColon(path=path, meta_path=path_meta), + "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=path_meta), "liver": mouse.DatasetGroupLiver(path=path, meta_path=path_meta), "lung": mouse.DatasetGroupLung(path=path, meta_path=path_meta), "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=path_meta), - "marrow": mouse.DatasetGroupMarrow(path=path, meta_path=path_meta), - "ovary": mouse.DatasetGroupOvary(path=path, meta_path=path_meta), + "bone": mouse.DatasetGroupBone(path=path, meta_path=path_meta), + "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=path_meta), "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=path_meta), - "peripheralblood": mouse.DatasetGroupPeripheralBlood(path=path, meta_path=path_meta), + "blood": mouse.DatasetGroupBlood(path=path, meta_path=path_meta), "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=path_meta), "prostate": mouse.DatasetGroupProstate(path=path, meta_path=path_meta), "rib": mouse.DatasetGroupRib(path=path, meta_path=path_meta), "skin": mouse.DatasetGroupSkin(path=path, meta_path=path_meta), - "smallintestine": mouse.DatasetGroupSmallintestine(path=path, meta_path=path_meta), + "ileum": mouse.DatasetGroupIleum(path=path, meta_path=path_meta), "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=path_meta), "stomach": mouse.DatasetGroupStomach(path=path, meta_path=path_meta), - "testis": mouse.DatasetGroupTestis(path=path, meta_path=path_meta), + "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=path_meta), "thymus": mouse.DatasetGroupThymus(path=path, meta_path=path_meta), "tongue": mouse.DatasetGroupTongue(path=path, meta_path=path_meta), - "trachae": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), + "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), "uterus": mouse.DatasetGroupUterus(path=path, meta_path=path_meta) } for k in list(ds_dict.keys()): diff --git a/sfaira/data/utils/write_backed_human.py b/sfaira/data/utils/write_backed_human.py index a90bc8c4e..1788f5e36 100644 --- a/sfaira/data/utils/write_backed_human.py +++ b/sfaira/data/utils/write_backed_human.py @@ -1,6 +1,6 @@ import sys import tensorflow as tf -import sfaira.api as sfaira +import sfaira import os from sfaira.data import human diff --git a/sfaira/data/utils/write_backed_mouse.py b/sfaira/data/utils/write_backed_mouse.py index fc6ff9c5a..a408380e9 100644 --- a/sfaira/data/utils/write_backed_mouse.py +++ b/sfaira/data/utils/write_backed_mouse.py @@ -1,6 +1,6 @@ import sys import tensorflow as tf -import sfaira.api as sfaira +import sfaira import os from sfaira.data import mouse @@ -21,29 +21,29 @@ "bladder": mouse.DatasetGroupBladder(path=path, meta_path=path_meta), "brain": mouse.DatasetGroupBrain(path=path, meta_path=path_meta), "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=path_meta), - "fat": mouse.DatasetGroupFat(path=path, meta_path=path_meta), + "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=path_meta), "heart": mouse.DatasetGroupHeart(path=path, meta_path=path_meta), "kidney": mouse.DatasetGroupKidney(path=path, meta_path=path_meta), - "largeintestine": mouse.DatasetGroupLargeintestine(path=path, meta_path=path_meta), - "limbmuscle": mouse.DatasetGroupLimbmuscle(path=path, meta_path=path_meta), + "colon": mouse.DatasetGroupColon(path=path, meta_path=path_meta), + "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=path_meta), "liver": mouse.DatasetGroupLiver(path=path, meta_path=path_meta), "lung": mouse.DatasetGroupLung(path=path, meta_path=path_meta), "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=path_meta), - "marrow": mouse.DatasetGroupMarrow(path=path, meta_path=path_meta), - "ovary": mouse.DatasetGroupOvary(path=path, meta_path=path_meta), + "bone": mouse.DatasetGroupBone(path=path, meta_path=path_meta), + "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=path_meta), "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=path_meta), - "peripheralblood": mouse.DatasetGroupPeripheralBlood(path=path, meta_path=path_meta), + "blood": mouse.DatasetGroupBlood(path=path, meta_path=path_meta), "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=path_meta), "prostate": mouse.DatasetGroupProstate(path=path, meta_path=path_meta), "rib": mouse.DatasetGroupRib(path=path, meta_path=path_meta), "skin": mouse.DatasetGroupSkin(path=path, meta_path=path_meta), - "smallintestine": mouse.DatasetGroupSmallintestine(path=path, meta_path=path_meta), + "ileum": mouse.DatasetGroupIleum(path=path, meta_path=path_meta), "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=path_meta), "stomach": mouse.DatasetGroupStomach(path=path, meta_path=path_meta), - "testis": mouse.DatasetGroupTestis(path=path, meta_path=path_meta), + "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=path_meta), "thymus": mouse.DatasetGroupThymus(path=path, meta_path=path_meta), "tongue": mouse.DatasetGroupTongue(path=path, meta_path=path_meta), - "trachae": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), + "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), "uterus": mouse.DatasetGroupUterus(path=path, meta_path=path_meta), } ds = sfaira.data.DatasetSuperGroup( diff --git a/sfaira/estimators/external.py b/sfaira/estimators/external.py index 08e7714d8..3e27959ef 100644 --- a/sfaira/estimators/external.py +++ b/sfaira/estimators/external.py @@ -1,4 +1,4 @@ from sfaira.versions.celltype_versions import SPECIES_DICT, CelltypeVersionsBase from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.topology_versions import Topologies -from sfaira.models import BasicModel +from sfaira.models.base import BasicModel diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 188d39b05..960a091b8 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -45,7 +45,7 @@ def __init__( model_type: Union[str, None], model_topology: Union[str, None], weights_md5: Union[str, None] = None, - cache_path: str = 'cache/' + cache_path: str = os.path.join('cache', '') ): self.data = data self.obs_train = None @@ -78,52 +78,65 @@ def load_pretrained_weights(self): """ Loads model weights from local directory or zenodo. """ - if self.model_dir.endswith('/'): - self.model_dir += '/' - if self.model_dir.startswith('http'): # Remote repo if not os.path.exists(self.cache_path): os.makedirs(self.cache_path) import urllib.request + from urllib.parse import urljoin from urllib.error import HTTPError try: - urllib.request.urlretrieve(self.model_dir + self.model_id + '_weights.h5', - self.cache_path + self.model_id + '_weights.h5') + urllib.request.urlretrieve(self.model_dir, + os.path.join(self.cache_path, os.path.basename(self.model_dir)) + ) + fn = os.path.join(self.cache_path, os.path.basename(self.model_dir)) except HTTPError: try: - urllib.request.urlretrieve(self.model_dir + self.model_id + '_weights.data-00000-of-00001', - self.cache_path + self.model_id + '_weights.data-00000-of-00001') + urllib.request.urlretrieve(urljoin(self.model_dir, f'{self.model_id}_weights.h5'), + os.path.join(self.cache_path, f'{self.model_id}_weights.h5') + ) + fn = os.path.join(self.cache_path, f"{self.model_id}_weights.h5") except HTTPError: - raise FileNotFoundError(f'cannot find remote weightsfile: {self.model_dir + self.model_id}') - - fn = self.cache_path + self.model_id + "_weights" + try: + urllib.request.urlretrieve(urljoin(self.model_dir, f'{self.model_id}_weights.data-00000-of-00001'), + os.path.join(self.cache_path, f'{self.model_id}_weights.data-00000-of-00001') + ) + fn = os.path.join(self.cache_path, f"{self.model_id}_weights.data-00000-of-00001") + except HTTPError: + raise FileNotFoundError(f'cannot find remote weightsfile') else: # Local repo if not self.model_dir: raise ValueError('the model_id is set but the path to the model is empty') - fn = self.model_dir + self.model_id + "_weights" + if os.path.isfile(self.model_dir) \ + and not self.model_dir.endswith(".h5") \ + and not self.model_dir.endswith(".data-00000-of-00001"): + raise ValueError('weights files saved in h5 format need to have an h5 file extension') + + if os.path.isfile(self.model_dir): + fn = self.model_dir + elif os.path.isfile(os.path.join(self.model_dir, f"{self.model_id}_weights.data-00000-of-00001")): + fn = os.path.join(self.model_dir, f"{self.model_id}_weights.data-00000-of-00001") + elif os.path.isfile(os.path.join(self.model_dir, f"{self.model_id}_weights.h5")): + fn = os.path.join(self.model_dir, f"{self.model_id}_weights.h5") + else: + raise ValueError('the weightsfile could not be found') - if os.path.exists(fn+'.h5'): - self._assert_md5_sum(fn+'.h5', self.md5) - self.model.training_model.load_weights(fn+'.h5') - elif os.path.exists(fn + ".data-00000-of-00001"): - self._assert_md5_sum(fn + ".data-00000-of-00001", self.md5) - self.model.training_model.load_weights(fn) - elif os.path.exists(fn): - raise ValueError('weights files saved in h5 format need to have an h5 file extension') + self._assert_md5_sum(fn, self.md5) + if fn.endswith(".data-00000-of-00001"): + self.model.training_model.load_weights(".".join(fn.split(".")[:-1])) else: - raise ValueError(f'the weightsfile {fn} could not be found') + self.model.training_model.load_weights(fn) def save_weights_to_cache(self): - if not os.path.exists(self.cache_path+'weights/'): - os.makedirs(self.cache_path+'weights/') - fn = self.cache_path + 'weights/' + str(self.model_id) + "_weights_cache.h5" + if not os.path.exists(os.path.join(self.cache_path, 'weights')): + os.makedirs(os.path.join(self.cache_path, 'weights')) + fn = os.path.join(self.cache_path, 'weights', f"{self.model_id}_weights_cache.h5") self.model.training_model.save_weights(fn) def load_weights_from_cache(self): - fn = self.cache_path + 'weights/' + str(self.model_id) + "_weights_cache.h5" + fn = os.path.join(self.cache_path, 'weights', f"{self.model_id}_weights_cache.h5") self.model.training_model.load_weights(fn) def init_model(self, clear_weight_cache=True, override_hyperpar=None): @@ -132,9 +145,9 @@ def init_model(self, clear_weight_cache=True, override_hyperpar=None): :return: """ if clear_weight_cache: - if os.path.exists(self.cache_path+'weights/'): - for file in os.listdir(self.cache_path+'weights/'): - file_path = os.path.join(self.cache_path+'weights/', file) + if os.path.exists(os.path.join(self.cache_path, 'weights')): + for file in os.listdir(os.path.join(self.cache_path, 'weights')): + file_path = os.path.join(os.path.join(self.cache_path, 'weights'), file) os.remove(file_path) def _assert_md5_sum( @@ -466,7 +479,7 @@ def __init__( model_type: Union[str, None], model_topology: Union[str, None], weights_md5: Union[str, None] = None, - cache_path: str = 'cache/' + cache_path: str = os.path.join('cache', '') ): super(EstimatorKerasEmbedding, self).__init__( data=data, @@ -891,7 +904,7 @@ def __init__( model_type: Union[str, None], model_topology: Union[str, None], weights_md5: Union[str, None] = None, - cache_path: str = 'cache/', + cache_path: str = os.path.join('cache', ''), max_class_weight: float = 1e3 ): super(EstimatorKerasCelltype, self).__init__( @@ -1047,7 +1060,7 @@ def generator(): return dataset - elif mode == 'eval' or mode == 'predict': + elif mode == 'eval': weights, y = self._get_celltype_out(idx=idx) if not weighted: weights = np.ones_like(weights) @@ -1064,6 +1077,19 @@ def generator(): return x, y, weights + elif mode == 'predict': + # Prepare data reading according to whether anndata is backed or not: + if self.data.isbacked: + # Need to supply sorted indices to backed anndata: + x = self.data.X[np.sort(idx), :] + # Sort back in original order of indices. + x = x[[np.where(np.sort(idx) == i)[0][0] for i in idx], :] + else: + x = self._prepare_data_matrix(idx=idx) + x = x.toarray() + + return x, None, None + else: raise ValueError(f'Mode {mode} not recognised. Should be "train", "eval" or" predict"') @@ -1092,7 +1118,7 @@ def predict(self): prediction """ if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None - x, y, _ = self._get_dataset( + x, _, _ = self._get_dataset( idx=self.idx_test, batch_size=None, mode='predict' diff --git a/sfaira/interface/__init__.py b/sfaira/interface/__init__.py index 9bc2dad49..5e70f72b4 100644 --- a/sfaira/interface/__init__.py +++ b/sfaira/interface/__init__.py @@ -1,2 +1 @@ from sfaira.interface.user_interface import UserInterface -from sfaira.interface.model_zoo import ModelZooEmbedding, ModelZooCelltype, ModelZoo diff --git a/sfaira/interface/external.py b/sfaira/interface/external.py index f4e9a8a8f..fdb52e721 100644 --- a/sfaira/interface/external.py +++ b/sfaira/interface/external.py @@ -1,5 +1,5 @@ from sfaira.estimators import EstimatorKeras, EstimatorKerasEmbedding, EstimatorKerasCelltype -from sfaira.preprocessing import gene_filter, cell_filter, tpm_normalize import sfaira.versions.celltype_versions as celltype_versions from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.topology_versions import Topologies +from sfaira.data.interactive import DatasetInteractive diff --git a/sfaira/interface/user_interface.py b/sfaira/interface/user_interface.py index 98783060a..ef0e1ad70 100644 --- a/sfaira/interface/user_interface.py +++ b/sfaira/interface/user_interface.py @@ -7,8 +7,9 @@ import pandas as pd import os from typing import List, Union +import warnings -from .external import EstimatorKerasEmbedding, EstimatorKerasCelltype +from .external import EstimatorKerasEmbedding, EstimatorKerasCelltype, DatasetInteractive from .model_zoo import ModelZooEmbedding, ModelZooCelltype @@ -17,7 +18,7 @@ class UserInterface: This class performs data set handling and coordinates estimators for the different model types. Example code to obtain a UMAP embedding plot of the embedding created from your data with cell-type labels: ``` - import sfaira.api as sfaira + import sfaira import anndata import scanpy @@ -50,31 +51,24 @@ def __init__( self, custom_repo: Union[list, str, None] = None, sfaira_repo: bool = False, - cache_path: str = 'cache/' + cache_path: str = os.path.join('cache', '') ): self.model_kipoi_embedding = None self.model_kipoi_celltype = None self.estimator_embedding = None self.estimator_celltype = None self.use_sfaira_repo = sfaira_repo - - if cache_path.endswith("/"): - self.cache_path = cache_path - else: - self.cache_path = cache_path + "/" - - if custom_repo is not None and not custom_repo.endswith("/"): - custom_repo += "/" + self.cache_path = os.path.join(cache_path, '') if sfaira_repo: # check if public sfaira repository should be accessed - self.model_lookuptable = self._load_lookuptable("https://sandbox.zenodo.org/record/647061/files/") #TODO: this still points to zenodo sandbox + self.model_lookuptable = self._load_lookuptable("https://zenodo.org/record/4304660/files/") if custom_repo: if isinstance(custom_repo, str): custom_repo = [custom_repo] for repo in custom_repo: - if os.path.exists(repo) and not os.path.exists(repo + 'model_lookuptable.csv'): + if os.path.exists(repo) and not os.path.exists(os.path.join(repo, 'model_lookuptable.csv')): self.write_lookuptable(repo) if hasattr(self, 'model_lookuptable'): @@ -89,6 +83,9 @@ def __init__( raise ValueError("please either provide a custom folder/repository with model weights or specify " "`sfaira_repo=True` to access the public weight repository") + # TODO: workaround to deal with model ids bearing file endings in model lookuptable (as is the case in first sfaira model repo upload) + self.model_lookuptable['model_id'] = [i.replace('.h5', '').replace('.data-00000-of-00001', '') for i in self.model_lookuptable['model_id']] + self.zoo_embedding = ModelZooEmbedding(self.model_lookuptable) self.zoo_celltype = ModelZooCelltype(self.model_lookuptable) @@ -103,7 +100,7 @@ def _load_lookuptable( :param repo_path: :return: model_lookuptable """ - model_lookuptable = pd.read_csv(repo_path + 'model_lookuptable.csv', header=0, index_col=0) + model_lookuptable = pd.read_csv(os.path.join(repo_path, 'model_lookuptable.csv'), header=0, index_col=0) # check for any duplicated model_ids if hasattr(self, 'model_lookuptable'): @@ -120,50 +117,190 @@ def write_lookuptable( repo_path: str ): """ - checks if there is a txt file that lists the model_id and path of models in the directory - adds model_index that connects model_id with the link to the model - :param repo_path: :return: """ import hashlib - files = [ - os.path.join(repo_path, f) for f in os.listdir(repo_path) - if (os.path.isfile(os.path.join(repo_path, f)) - and (f.endswith('_weights.h5') or f.endswith('_weights.data-00000-of-00001')) - and (f.startswith('embedding') or f.startswith('celltype')) - ) - ] - - if files: - file_names = [f.split('/')[-1] for f in files] - s = [i.split('_')[0:7] for i in file_names] - ids = ['_'.join(i) for i in s] - md5 = [] + file_names = [] + model_paths = [] + file_paths = [] + md5 = [] + for subdir, dirs, files in os.walk(repo_path): for file in files: - with open(file, 'rb') as f: - md5.append(hashlib.md5(f.read()).hexdigest()) - + if os.path.isfile(os.path.join(subdir, file)) and ( + file.endswith('.h5') or file.endswith('.data-00000-of-00001')) and ( + file.startswith('embedding_') or file.startswith('celltype_')): + model_paths.append(os.path.join(subdir, "")) + file_paths.append(os.path.join(subdir, file)) + file_names.append(file) + with open(os.path.join(subdir, file), 'rb') as f: + md5.append(hashlib.md5(f.read()).hexdigest()) + s = [i.split('_')[0:7] for i in file_names] + ids = ['_'.join(i) for i in s] + ids_cleaned = [i.replace('.h5', '').replace('.data-00000-of-00001', '') for i in ids] # remove file extensions from ids + + if ids: pd.DataFrame( - list(zip(ids, [repo_path for i in files], md5)), - columns=['model_id', 'model_path', 'md5'] - ).to_csv(repo_path + 'model_lookuptable.csv') + list(zip(ids_cleaned, model_paths, file_paths, md5)), + columns=['model_id', 'model_path', 'model_file_path', 'md5'] + )\ + .sort_values('model_id')\ + .reset_index(drop=True)\ + .to_csv(os.path.join(repo_path, 'model_lookuptable.csv')) else: - raise ValueError('No model weights found in {}.' + raise ValueError(f'No model weights found in {repo_path} ' 'Weights need to have .h5 or .data-00000-of-00001 extension' - 'to be recognised'.format(repo_path) + 'to be recognised' ) - def load_data( + def deposit_zenodo( self, - data: anndata.AnnData + zenodo_access_token: str, + title: str, + authors: list, + description: str, + metadata: dict = {}, + publish: bool = False, + sandbox: bool = False ): """ + Deposit all models in model lookup table on Zenodo. If publish is set to false, files will be uploaded to a + deposition draft, which can be further edited (additional metadata, files etc.). Returns the DOI link if + publish=True or a link to the deposition draft if publish=False. + + :param zenodo_access_token: Your personal Zenodo API access token. Create one here: https://zenodo.org/account/settings/applications/tokens/new/ + :param title: Title of the Zenodo deposition + :param authors: List of dicts, where each dict defines one author (dict keys: name: Name of creator in the format "Family name, Given names", affiliation: Affiliation of creator (optional), orcid: ORCID identifier of creator (optional), gnd: GND identifier of creator (optional) + :param description: Description of the Zenodo deposition. + :param metadata: Dictionary with further metadata attributes of the deposit. See the Zenodo API refenrece for accepted keys: https://developers.zenodo.org/#representation + :param publish: Set this to True to directly publish the weights on Zenodo. When set to False a draft will be created, which can be edited in the browser before publishing. + :param sandbox: If True, use the Zenodo testing platform at https://sandbox.zenodo.org for your deposition. We recommend testing your upload with sandbox first as depositions cannot be deleted from the main Zenodo platfowm once created. + """ + + import requests + import json + headers = {"Content-Type": "application/json"} + params = {'access_token': zenodo_access_token} + sandbox = 'sandbox.' if sandbox else '' + + # Verify access token + r = requests.get(f'https://{sandbox}zenodo.org/api/deposit/depositions', params=params) + if r.status_code != 200: + raise ValueError( + "Your Zenodo access token was not accepted by the API. Please provide a valid access token.") + + # Create empty deposition + r = requests.post(f'https://{sandbox}zenodo.org/api/deposit/depositions', + params=params, + json={}, + headers=headers) + + # Obtain bucket URL and deposition ID + bucket_url = r.json()["links"]["bucket"] + deposition_id = r.json()['id'] + + # Loop over files in model lookup table and upload them one by one + for i, weight_path in enumerate(self.model_lookuptable['model_file_path']): + filename = os.path.basename(weight_path) + with open(weight_path, "rb") as fp: + r = requests.put( + f"{bucket_url}/{filename}", + data=fp, + params=params, + ) + # Verify checksum after upload + if r.json()['checksum'][4:] != self.model_lookuptable['md5'][i]: + warnings.warn(f"The md5 checksum in your model_lookuptable for {self.model_lookuptable['model_id'][i]} " + f"does not match the md5 checksum of the uploaded file.") + + # Add model lookup table to zenodo + df = self.model_lookuptable.copy() + df['model_path'] = f"https://{sandbox}zenodo.org/record/{deposition_id}/files/" + df['model_file_path'] = [f"https://{sandbox}zenodo.org/record/{deposition_id}/files/{os.path.basename(f)}" for f + in self.model_lookuptable['model_file_path']] + r = requests.put( + f"{bucket_url}/model_lookuptable.csv", + data=df.to_csv(), + params=params, + ) - :return: + # Add metadata + meta_core = { + 'title': title, + 'creators': authors, + 'description': description, + 'license': 'cc-by-4.0', + 'upload_type': 'dataset', + 'access_right': 'open' + } + meta = {**meta_core, **metadata} + r = requests.put(f'https://{sandbox}zenodo.org/api/deposit/depositions/{deposition_id}', + params=params, + data=json.dumps({ + 'metadata': meta + }), + headers=headers) + + if not publish: + print(f"Zenodo deposition draft has been created: {r.json()['links']['latest_draft_html']}") + return r.json()['links']['latest_draft_html'] + else: + # Publish the deposition + r = requests.post(f'https://{sandbox}zenodo.org/api/deposit/depositions/{deposition_id}/actions/publish', + params=params) + if r.status_code == 202: + if sandbox: + print(f"Weights referenced in model_lookuptable have been sucessfully published on Zenodo: " + f"{r.json()['links']['latest_html']}") + return r.json()['links']['latest_html'] + else: + print(f"Weights referenced in model_lookuptable have been sucessfully published on Zenodo: " + f"{r.json()['links']['conceptdoi']}") + return r.json()['links']['conceptdoi'] + else: + try: + m = r.json()['message'] + except KeyError: + m = f"Submission failed with html status code {r.status_code}" + raise ValueError(m) + + def load_data( + self, + data: anndata.AnnData, + gene_symbol_col: Union[str, None] = None, + gene_ens_col: Union[str, None] = None + ): """ - self.data = data + Loads the provided AnnData object into sfaira. + If genes in the provided AnnData object are annotated as gene symbols, please provide the name of the corresponding var column (or 'index') through the gene_symbol_col argument. + If genes in the provided AnnData object are annotated as ensembl ids, please provide the name of the corresponding var column (or 'index') through the gene_ens_col argument. + You need to provide at least one of the two. + :param data: AnnData object to load + :param gene_symbol_col: Var column name (or 'index') which contains gene symbols + :param gene_ens_col: ar column name (or 'index') which contains ensembl ids + """ + if self.zoo_embedding.species is not None: + species = self.zoo_embedding.species + organ = self.zoo_embedding.organ + elif self.zoo_celltype.species is not None: + species = self.zoo_celltype.species + organ = self.zoo_celltype.organ + else: + raise ValueError("Please first set which model_id to use via the model zoo before loading the data") + + if gene_ens_col is None and gene_symbol_col is None: + raise ValueError("Please provide either the gene_ens_col or the gene_symbol_col argument.") + + dataset = DatasetInteractive( + data=data, + species=species, + organ=organ, + gene_symbol_col=gene_symbol_col, + gene_ens_col=gene_ens_col + ) + dataset.load() + self.data = dataset.adata def filter_cells(self): """ @@ -183,9 +320,7 @@ def load_model_embedding(self): :return: Model ID loaded. """ assert self.zoo_embedding.model_id is not None, "choose embedding model first" - model_dir = self.model_lookuptable.model_path[self.model_lookuptable.model_id == self.zoo_embedding.model_id].iloc[0] - if not model_dir.endswith("/"): - model_dir += "/" + model_dir = self.model_lookuptable.model_file_path[self.model_lookuptable.model_id == self.zoo_embedding.model_id].iloc[0] md5 = self.model_lookuptable.md5[self.model_lookuptable.model_id == self.zoo_embedding.model_id].iloc[0] self.estimator_embedding = EstimatorKerasEmbedding( data=self.data, @@ -210,9 +345,7 @@ def load_model_celltype(self): :return: Model ID loaded. """ assert self.zoo_celltype.model_id is not None, "choose cell type model first" - model_dir = self.model_lookuptable.model_path[self.model_lookuptable.model_id == self.zoo_celltype.model_id].iloc[0] - if not model_dir.endswith("/"): - model_dir += "/" + model_dir = self.model_lookuptable.model_file_path[self.model_lookuptable.model_id == self.zoo_celltype.model_id].iloc[0] md5 = self.model_lookuptable.md5[self.model_lookuptable.model_id == self.zoo_celltype.model_id].iloc[0] self.estimator_celltype = EstimatorKerasCelltype( data=self.data, diff --git a/sfaira/models/celltype/external.py b/sfaira/models/celltype/external.py index cbee67fd8..46629b4d4 100644 --- a/sfaira/models/celltype/external.py +++ b/sfaira/models/celltype/external.py @@ -1,3 +1,4 @@ import sfaira.versions.celltype_versions as celltype_versions from sfaira.versions.topology_versions import Topologies -from sfaira.models import BasicModel, PreprocInput +from sfaira.models.base import BasicModel +from sfaira.models.pp_layer import PreprocInput diff --git a/sfaira/models/embedding/external.py b/sfaira/models/embedding/external.py index 7656c708e..c831cdb04 100644 --- a/sfaira/models/embedding/external.py +++ b/sfaira/models/embedding/external.py @@ -1,2 +1,4 @@ from sfaira.versions.topology_versions import Topologies -from sfaira.models import BasicModel, PreprocInput, MaskingDense \ No newline at end of file +from sfaira.models.base import BasicModel +from sfaira.models.pp_layer import PreprocInput +from sfaira.models.made import MaskingDense diff --git a/sfaira/preprocessing.py b/sfaira/preprocessing.py deleted file mode 100644 index cf12b6e20..000000000 --- a/sfaira/preprocessing.py +++ /dev/null @@ -1,12 +0,0 @@ -import numpy as np - -def cell_filter(): - pass - - -def gene_filter(): - pass - - -def tpm_normalize(): - pass \ No newline at end of file diff --git a/sfaira/train/external.py b/sfaira/train/external.py index 7fb82b713..158904c07 100644 --- a/sfaira/train/external.py +++ b/sfaira/train/external.py @@ -1,5 +1,5 @@ from sfaira.versions.celltype_versions import SPECIES_DICT from sfaira.data import DatasetGroupBase, DatasetSuperGroup from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.interface import ModelZoo, ModelZooEmbedding, ModelZooCelltype +from sfaira.interface.model_zoo import ModelZoo, ModelZooEmbedding, ModelZooCelltype from sfaira.data import mouse, human diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index c42fa6779..86119aa17 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -135,7 +135,7 @@ def load_gs( :param gs_ids: :return: """ - res_dirs = [self.source_path + x + "/results/" for x in gs_ids] + res_dirs = [os.path.join(self.source_path, x, "results", "") for x in gs_ids] run_ids = [ np.sort(np.unique([ x.split("_history.pickle")[0] @@ -152,33 +152,36 @@ def load_gs( gs_keys = [] for i, indir in enumerate(res_dirs): for x in run_ids[i]: - fn_history = indir + x + "_history.pickle" + fn_history = os.path.join(indir, f"{x}_history.pickle") if os.path.isfile(fn_history): with open(fn_history, 'rb') as f: histories[x] = pickle.load(f) else: - print("file %s not found" % (x + "_history.pickle")) - fn_eval = indir + x + "_evaluation.pickle" + print(f"file {x}_history.pickle not found") + + fn_eval = os.path.join(indir, f"{x}_evaluation.pickle") if os.path.isfile(fn_eval): with open(fn_eval, 'rb') as f: evals[x] = pickle.load(f) else: - print("file %s not found" % (x + "_evaluation.pickle")) - fn_hp = indir + x + "_hyperparam.pickle" + print(f"file {x}_evaluation.pickle not found") + + fn_hp = os.path.join(indir, f"{x}_hyperparam.pickle") if os.path.isfile(fn_hp): with open(fn_hp, 'rb') as f: hyperpars[x] = pickle.load(f) else: - print("file %s not found" % (x + "_hyperparam.pickle")) - fn_mhp = indir + x + "_model_hyperparam.pickle" + print(f"file {x}_hyperparam.pickle not found") + + fn_mhp = os.path.join(indir, f"{x}_model_hyperparam.pickle") if os.path.isfile(fn_mhp): with open(fn_mhp, 'rb') as f: model_hyperpars[x] = pickle.load(f) else: - pass - #TODO add: print("file %s not found" % (x + "_model_hyperparam.pickle")) + print(f"file {x}_model_hyperparam.pickle not found") + run_ids_proc.append(x) - gs_keys.append(indir.split("/")[-3]) + gs_keys.append(os.path.normpath(indir).split(os.path.sep)[-2]) self.run_ids = run_ids_proc self.gs_keys = dict(zip(run_ids_proc, gs_keys)) @@ -192,7 +195,7 @@ def load_y( hat_or_true: str, run_id: str ): - fn = self.source_path + self.gs_keys[run_id] + "/results/" + run_id + f"_y{hat_or_true}.npy" + fn = os.path.join(self.source_path, self.gs_keys[run_id], "results", f"{run_id}_y{hat_or_true}.npy") return np.load(fn) def best_model_by_partition( @@ -273,7 +276,7 @@ def get_best_model_ids( if partition_select not in ["test", "val", "train"]: raise ValueError("partition %s not recognised" % partition_select) - metric_select = partition_select + "_" + metric_select + metric_select = f"{partition_select}_{metric_select}" if cv_mode.lower() == "mean": best_model = tab.groupby("run", as_index=False)[metric_select].mean().\ @@ -337,8 +340,8 @@ def save_best_weight( subset=subset, ) shutil.copyfile( - self.source_path + self.gs_keys[model_id] + "/results/" + model_id + "_weights.h5", - path + model_id + "_weights.h5" + os.path.join(self.source_path, self.gs_keys[model_id], "results", f"{model_id}_weights.h5"), + os.path.join(path, f"{model_id}_weights.h5") ) def plot_completions( @@ -438,7 +441,7 @@ def plot_best_model_by_hyperparam( for i, organ in enumerate(organs): summary_table = summary_table_param.loc[summary_table_param["organ"].values == organ, :] # Plot each metric: - ycol = partition_show + "_" + metric_select + ycol = f"{partition_show}_{metric_select}" if len(organs) == 1 and len(params) == 1: ax = np.array([ax]) sns.boxplot( @@ -506,12 +509,12 @@ def plot_training_history( ).tolist(): sns_data_temp = pandas.DataFrame(self.histories[run]) sns_data_temp["epoch"] = np.arange(0, sns_data_temp.shape[0]) - sns_data_temp["cv"] = run.split("_")[-1] + sns_data_temp["cv"] = int(run.split("_")[-1]) sns_data.append(sns_data_temp) sns_data = pandas.concat(sns_data, axis=0) else: cv = cv_key - sns_data = pandas.DataFrame(self.histories[model_gs_id + "_" + cv]) + sns_data = pandas.DataFrame(self.histories[f"{model_gs_id}_{cv}"]) sns_data["epoch"] = np.arange(0, sns_data.shape[0]) sns_data["cv"] = cv @@ -531,13 +534,13 @@ def plot_training_history( # metric if metric_show not in sns_data.columns: - raise ValueError("metric %s not found in %s" % (metric_show, str(sns_data.columns))) + raise ValueError(f"metric {metric_show} not found in {sns_data.columns}") sns_data_metric = pandas.concat([pandas.DataFrame({ "epoch": sns_data["epoch"].values, "cv": sns_data["cv"].values, metric_show: sns_data[metric_show].values, "partition": x - }) for i, x in enumerate([metric_show, "val_" + metric_show])]) + }) for i, x in enumerate([metric_show, f"val_{metric_show}"])]) sns.lineplot( x="epoch", y=metric_show, style="partition", hue="cv", data=sns_data_metric, ax=ax[i, 1] @@ -580,26 +583,26 @@ def write_best_hyparam( self.source_path, self.gs_keys[best_model_id], 'results', - best_model_id + best_model_id, ) else: file_path_base = os.path.join( self.source_path, - self.gs_keys[best_model_id + "_cv" + str(cvs[0])], + self.gs_keys[f"{best_model_id}_cv{cvs[0]}"], 'results', - best_model_id + "_cv" + str(cvs[0]) + f"{best_model_id}_cv{cvs[0]}", ) # Read model hyperparameter - with open(file_path_base + "_model_hyperparam.pickle", 'rb') as file: + with open(f"{file_path_base}_model_hyperparam.pickle", 'rb') as file: hyparam_model = pickle.load(file) # Read optimizer hyperparameter - with open(file_path_base + "_hyperparam.pickle", 'rb') as file: + with open(f"{file_path_base}_hyperparam.pickle", 'rb') as file: hyparam_optim = pickle.load(file) # Write both hyperparameter dicts - with open(os.path.join(write_path, best_model_id[:-12] + "_best_hyperparam.txt"), 'w') as file: + with open(os.path.join(write_path, f"{best_model_id[:-12]}_best_hyperparam.txt"), 'w') as file: file.write(json.dumps({"model": hyparam_model, "optimizer": hyparam_optim})) return @@ -630,7 +633,7 @@ def load_ontology_names( :param run_id: :return: """ - fn = self.source_path + self.gs_keys[run_id] + "/results/" + run_id + "_ontology_names.pickle" + fn = os.path.join(self.source_path, self.gs_keys[run_id], "results", f"{run_id}_ontology_names.pickle") if not os.path.isfile(fn): raise FileNotFoundError(f"file {run_id}_ontology_names.pickle not found") with open(fn, 'rb') as f: @@ -663,15 +666,15 @@ def create_summary_tab(self): metrics = list(self.evals.values())[0]['val'].keys() self.summary_tab = pandas.DataFrame(dict( list({ - "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], - "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], - "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], + "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], + "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], + "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], - "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], - "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], - "cv": [id_i.split("_")[-1] if self.cv else "cv0" for id_i in self.run_ids], - "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], - "organ": [id_i.split("_")[2] for id_i in self.run_ids], + "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], + "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], + "cv": [id_i.split("_")[-1] if self.cv else "cv0" for id_i in self.run_ids], + "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], + "organ": [id_i.split("_")[2] for id_i in self.run_ids], "model_type": [ "linear" if (id_i.split("_")[3] == "mlp" and id_i.split("_")[5].split(".")[1] == "0") else id_i.split("_")[3] @@ -705,12 +708,12 @@ def best_model_celltype( if model_id is not None: if cvs is not None: fns = [ - self.source_path + self.gs_keys[model_id + "_cv" + str(x)] + "/results/" + model_id + "_cv" + str(x) + os.path.join(self.source_path, self.gs_keys[f"{model_id}_cv{x}"], "results", f"{model_id}_cv{x}") for x in cvs ] else: - fns = [self.source_path + self.gs_keys[model_id] + "/results/" + model_id] - covar = [pandas.read_csv(x + "_covar.csv") for x in fns] + fns = [os.path.join(self.source_path, self.gs_keys[model_id], "results", model_id)] + covar = [pandas.read_csv(f"{x}_covar.csv") for x in fns] return model_id, covar else: return None, [None] @@ -777,7 +780,7 @@ def plot_best( np.logical_and( sns_tab["model_type"].values == m, sns_tab["organ"].values == o - ), partition_show + "_" + metric_show + ), f"{partition_show}_{metric_show}" ] if data_temp.shape[0] > 0: if self.cv: @@ -809,7 +812,7 @@ def plot_best( annot=True, fmt=".2f", ax=axs, vmin=0, vmax=1, xticklabels=True, yticklabels=True, - cbar_kws={'label': partition_show + "_" + metric_show}, + cbar_kws={'label': f"{partition_show}_{metric_show}"}, cmap=None ) return fig, axs, sns_data_heatmap @@ -910,12 +913,13 @@ def plot_best_classwise_heatmap( elif metric_show == "f1": m = f1(yhat, ytrue) else: - raise ValueError("did not recognize metric_show %s" % metric_show) + raise ValueError(f"did not recognize metric_show {metric_show}") vals.append(m) - sns_tab[metric_show + "_classwise"] = vals + sns_tab[f"{metric_show}_classwise"] = vals # Build figure. model_types = sns_tab["model_type"].unique() + model_types.sort() classes = self.load_ontology_names(run_id=sns_tab["run"].values[0]) if 'unknown' not in classes and 'Unknown' not in classes: classes = classes + ['Unknown'] @@ -923,7 +927,7 @@ def plot_best_classwise_heatmap( hm = np.zeros((len(classes), len(model_types))) + np.nan # mask = np.isnan(hm) for i, m in enumerate(model_types): - data_temp = np.vstack(sns_tab.loc[sns_tab["model_type"].values == m, metric_show + "_classwise"].values) + data_temp = np.vstack(sns_tab.loc[sns_tab["model_type"].values == m, f"{metric_show}_classwise"].values) if data_temp.shape[0] > 0: if self.cv: if collapse_cv == "mean": @@ -943,7 +947,7 @@ def plot_best_classwise_heatmap( if c in cell_counts.keys(): n_cells.append(np.round(cell_counts[c])) else: - warnings.warn(f"Celltype {c} from cell ontology now found in {organism} {organ} dataset") + warnings.warn(f"Celltype {c} from cell ontology not found in {organism} {organ} dataset") n_cells.append(np.nan) n_cells = np.array(n_cells)[:, None] sns_data_heatmap = pandas.DataFrame( @@ -961,7 +965,7 @@ def plot_best_classwise_heatmap( annot=True, fmt=".2f", ax=axs, vmin=0, vmax=1, xticklabels=True, yticklabels=True, - cbar_kws={'label': "test_" + metric_show}, + cbar_kws={'label': f"test_{metric_show}"}, cmap=None ) axs = sns.heatmap( @@ -1076,7 +1080,7 @@ def plot_best_classwise_scatter( else: raise ValueError("did not recognize metric_show %s" % metric_show) vals.append(m) - sns_tab[metric_show + "_classwise"] = vals + sns_tab[f"{metric_show}_classwise"] = vals # Build figure. model_types = sns_tab["model_type"].unique() @@ -1087,7 +1091,7 @@ def plot_best_classwise_scatter( hm = np.zeros((len(classes), len(model_types))) + np.nan # mask = np.isnan(hm) for i, m in enumerate(model_types): - data_temp = np.vstack(sns_tab.loc[sns_tab["model_type"].values == m, metric_show + "_classwise"].values) + data_temp = np.vstack(sns_tab.loc[sns_tab["model_type"].values == m, f"{metric_show}_classwise"].values) if data_temp.shape[0] > 0: if self.cv: if collapse_cv == "mean": @@ -1107,7 +1111,7 @@ def plot_best_classwise_scatter( if c in cell_counts.keys(): n_cells.append(np.round(cell_counts[c])) else: - warnings.warn(f"Celltype {c} from cell ontology now found in {organism} {organ} dataset") + warnings.warn(f"Celltype {c} from cell ontology not found in {organism} {organ} dataset") n_cells.append(np.nan) n_cells = np.array(n_cells)[:, None] sns_data_scatter = pandas.DataFrame( @@ -1171,23 +1175,23 @@ def create_summary_tab(self): metrics = list(self.evals.values())[0]['val'].keys() self.summary_tab = pandas.DataFrame(dict( list({ - "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], - "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], - "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], - "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], - "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], - "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], - "cv": [id_i.split("_")[-1] if self.cv else "1" for id_i in self.run_ids], - "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], - "organ": [id_i.split("_")[2] for id_i in self.run_ids], - "model_type": [id_i.split("_")[3] for id_i in self.run_ids], + "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], + "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], + "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], + "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], + "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], + "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], + "cv": [id_i.split("_")[-1] if self.cv else "1" for id_i in self.run_ids], + "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], + "organ": [id_i.split("_")[2] for id_i in self.run_ids], + "model_type": [id_i.split("_")[3] for id_i in self.run_ids], "model_gs_id": ["_".join(id_i.split("_")[:(self.model_id_len + 6)]) for id_i in self.run_ids], "run": self.run_ids, }.items()) + - list(dict([("train_" + m, [self.evals[x]["train"][m] for x in self.run_ids]) for m in metrics]).items()) + - list(dict([("test_" + m, [self.evals[x]["test"][m] for x in self.run_ids]) for m in metrics]).items()) + - list(dict([("val_" + m, [self.evals[x]["val"][m] for x in self.run_ids]) for m in metrics]).items()) + - list(dict([("all_" + m, [self.evals[x]["all"][m] for x in self.run_ids]) for m in metrics]).items()) + list(dict([("train_" + m, [self.evals[x]["train"][m] if m in self.evals[x]["train"].keys() else self.evals[x]["train"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("test_" + m, [self.evals[x]["test"][m] if m in self.evals[x]["test"].keys() else self.evals[x]["test"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("val_" + m, [self.evals[x]["val"][m] if m in self.evals[x]["val"].keys() else self.evals[x]["val"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("all_" + m, [self.evals[x]["all"][m] if m in self.evals[x]["all"].keys() else self.evals[x]["all"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) # TODO: Hacky solution to make sure metrics are called the same in VAE and other models )) # TODO: Hacky solution to make sure metrics are called the same in VAE and other models @@ -1223,13 +1227,13 @@ def best_model_embedding( if model_id is not None: if cvs is not None: fns = [ - self.source_path + self.gs_keys[model_id + "_cv" + str(x)] + "/results/" + model_id + "_cv" + str(x) + os.path.join(self.source_path, self.gs_keys[f"{model_id}_cv{x}"], "results", f"{model_id}_cv{x}") for x in cvs ] else: - fns = [self.source_path + self.gs_keys[model_id] + "/results/" + model_id] - embedding = [np.load(x + "_embedding.npy") for x in fns] - covar = [pandas.read_csv(x + "_covar.csv") for x in fns] + fns = [os.path.join(self.source_path, self.gs_keys[model_id], "results", model_id)] + embedding = [np.load(f"{x}_embedding.npy") for x in fns] + covar = [pandas.read_csv(f"{x}_covar.csv") for x in fns] return model_id, embedding, covar else: return None, [None], [None] @@ -1288,7 +1292,7 @@ def plot_best( np.logical_and( sns_tab["model_type"].values == m, sns_tab["organ"].values == o - ), partition_show + "_" + metric_show + ), f"{partition_show}_{metric_show}" ] if data_temp.shape[0] > 0: if self.cv: @@ -1319,7 +1323,7 @@ def plot_best( annot=True, fmt=".2f", ax=axs, xticklabels=True, yticklabels=True, - cbar_kws={'label': partition_show + "_" + metric_show} + cbar_kws={'label': f"{partition_show}_{metric_show}"} ) return fig, axs, sns_data_heatmap @@ -1362,11 +1366,11 @@ def get_gradients_by_celltype( ) # check cached file - resultspath = os.path.join(self.source_path, self.gs_keys[model_id], 'results') + resultspath = os.path.join(self.source_path, self.gs_keys[model_id], 'results', '') - if os.path.isfile(os.path.join(resultspath, model_id + '_grads.pickle')) and not ignore_cache: + if os.path.isfile(os.path.join(resultspath, f'{model_id}_grads.pickle')) and not ignore_cache: print('Load gradients from cached file...') - with open(os.path.join(resultspath, model_id + '_grads.pickle'), 'rb') as f: + with open(os.path.join(resultspath, f'{model_id}_grads.pickle'), 'rb') as f: gradients_raw = pickle.load(f) else: print('Compute gradients (1/3): load data') @@ -1394,12 +1398,12 @@ def get_gradients_by_celltype( model_topology=model_id.split('_')[5] ) embedding.init_model() - embedding.model.training_model.load_weights(os.path.join(resultspath, model_id + '_weights.h5')) + embedding.model.training_model.load_weights(os.path.join(resultspath, f'{model_id}_weights.h5')) # compute gradients print('Compute gradients (3/3): cumulate gradients') gradients_raw = embedding.compute_gradients_input(test_data=test_data, batch_size=256, per_celltype=True) - with open(os.path.join(resultspath, model_id + '_grads.pickle'), 'wb') as f: + with open(os.path.join(resultspath, f'{model_id}_grads.pickle'), 'wb') as f: pickle.dump(gradients_raw, f, pickle.HIGHEST_PROTOCOL) print('Gradients saved to cache file!') @@ -1581,7 +1585,6 @@ def plot_gradient_cor( if by_type: v = avg_grads[model_type[0]] celltypes_coord = celltypes[model_type[0]] - cell_names = [str(i) for i in range(v.shape[0])] cormat = pandas.DataFrame( np.corrcoef(v), index=celltypes_coord, @@ -1594,4 +1597,119 @@ def plot_gradient_cor( plt.tight_layout() if save is not None: plt.savefig(save) - plt.show() \ No newline at end of file + plt.show() + + def plot_npc( + self, + organ, + topology_version, + cvs=None + ): + """ + Plots the explained variance ration that accumulates explained variation of the latent space’s ordered + principal components. + If an embedding file is found that contains z, z_mean, z_var (eg. output from predict_variational() function) + the model will use z, and not z_mean. + """ + import matplotlib.pyplot as plt + if self.summary_tab is None: + self.create_summary_tab() + models = np.unique(self.summary_tab["model_type"]).tolist() + self.summary_tab["topology"] = [x.split("_")[5] for x in self.summary_tab["model_gs_id"].values] + + with plt.style.context("seaborn-whitegrid"): + plt.figure(figsize=(12, 6)) + for model in models: + model_id, embedding, covar = self.best_model_embedding( + subset={"model_type": model, "organ": organ, "topology": topology_version}, + partition="val", + metric="loss", + cvs=cvs, + ) + if len(embedding[0].shape) == 3: + z = embedding[0][0] # in case of three-dimensional VAE embedding (z, z_mean, z_var), use z + else: + z = embedding[0] + cov = np.cov(z.T) + eig_vals, eig_vecs = np.linalg.eig(cov) + eig_sum = sum(eig_vals) + var_exp = [(i / eig_sum) for i in sorted(eig_vals, reverse=True)] + cum_var_exp = np.cumsum([0] + var_exp) + plt.step(range(0, eig_vals.shape[0]+1), cum_var_exp, where="post", linewidth=3, + label="%s cumulative explained variance (95%%: %s / 99%%: %s)" % (model, np.sum(cum_var_exp < .95), np.sum(cum_var_exp < .99))) + plt.yticks([0.0, .25, .50, .75, .95, .99]) + plt.ylabel("Explained variance ratio", fontsize=16) + plt.xlabel("Principal components", fontsize=16) + plt.legend(loc="best", fontsize=16, frameon=True) + plt.tight_layout() + plt.show() + + def plot_active_latent_units( + self, + organ, + topology_version, + cvs=None + ): + """ + Plots latent unit activity measured by empirical variance of the expected latent space. + See: https://arxiv.org/abs/1509.00519 + If an embedding file is found that contains z, z_mean, z_var (eg. output from predict_variational() function) + the model will use z, and not z_mean. + """ + + colors = ['red', 'blue', 'green', 'cyan', 'magenta', 'yellow', 'darkgreen', 'lime', 'navy', 'royalblue', 'pink', 'peru'] + + def active_latent_units_mask(z): + var_x = np.diagonal(np.cov(z.T)) + min_var_x = 0.01 + active_units_mask = var_x > min_var_x + return active_units_mask + + import matplotlib.pyplot as plt + if self.summary_tab is None: + self.create_summary_tab() + models = np.unique(self.summary_tab["model_type"]).tolist() + self.summary_tab["topology"] = [x.split("_")[5] for x in self.summary_tab["model_gs_id"].values] + + with plt.style.context("seaborn-whitegrid"): + plt.figure(figsize=(12, 6)) + plt.axhline(np.log(0.01), color="k", linestyle='dashed', linewidth=2, label="active unit threshold") + for i, model in enumerate(models): + model_id, embedding, covar = self.best_model_embedding( + subset={"model_type": model, "organ": organ, "topology": topology_version}, + partition="val", + metric="loss", + cvs=cvs, + ) + if len(embedding[0].shape) == 3: + z = embedding[0][0] # in case of three-dimensional VAE embedding (z, z_mean, z_var), use z + else: + z = embedding[0] + latent_dim = z.shape[1] + var = np.sort(np.diagonal(np.cov(z.T)))[::-1] + log_var = np.log(var) + active_units = np.log(var[active_latent_units_mask(z)]) + + plt.plot(range(1,log_var.shape[0]+1), log_var, color=colors[i], alpha=1.0, linewidth=3, + label="%s active units: %i" % (model, len(active_units))) + # to plot vertical lines + log_var_cut = var.copy() + log_var_cut[~active_latent_units_mask(z)] = 0 + log_var_cut = np.log(log_var_cut) + num_active = np.argmax(log_var_cut) + if num_active > 0: + plt.vlines(num_active, ymin = -.15, ymax = 0.15, color=colors[i], linestyle='solid', linewidth=3) + if model == "vaevamp": + z1, z2 = np.split(np.log(np.diagonal(np.cov(z.T))),2) + plt.plot(range(1, int(latent_dim/2)+1), np.sort(z2)[::-1], color=colors[i], alpha=1.0, + label=r"%s $z_2$ active units: %i" % (model, len(z2[z2>np.log(0.01)])), linestyle='dashed', + linewidth=3) + plt.plot(range(1, int(latent_dim/2)+1), np.sort(z1)[::-1], color=colors[i], alpha=1.0, + label=r"%s $z_1$ active units: %i" % (model, len(z1[z1 > np.log(0.01)])), + linestyle='dotted', linewidth=3) + plt.xlabel(r'Latent unit $i$', fontsize=16) + plt.ylabel(r'$\log\,{(A_{\bf z})}_i$', fontsize=16) + plt.title(r"Latent unit activity", fontsize=16) + plt.legend(loc="upper right", frameon=True, fontsize=12) + plt.tight_layout() + plt.show() diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index e3ee5fcfd..c6125d400 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -13,6 +13,16 @@ class TargetZoos: + """ + Class that provides access to all available dataset groups in sfaira. + + Parameters + ---------- + path : str + The name of the animal + meta_path : str + The sound the animal makes + """ def __init__(self, path: Union[str, None], meta_path: Union[str, None] = None): if path is not None: @@ -20,29 +30,29 @@ def __init__(self, path: Union[str, None], meta_path: Union[str, None] = None): "bladder": mouse.DatasetGroupBladder(path=path, meta_path=meta_path), "brain": mouse.DatasetGroupBrain(path=path, meta_path=meta_path), "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=meta_path), - "fat": mouse.DatasetGroupFat(path=path, meta_path=meta_path), + "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=meta_path), "heart": mouse.DatasetGroupHeart(path=path, meta_path=meta_path), "kidney": mouse.DatasetGroupKidney(path=path, meta_path=meta_path), - "largeintestine": mouse.DatasetGroupLargeintestine(path=path, meta_path=meta_path), - "limbmuscle": mouse.DatasetGroupLimbmuscle(path=path, meta_path=meta_path), + "colon": mouse.DatasetGroupColon(path=path, meta_path=meta_path), + "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=meta_path), "liver": mouse.DatasetGroupLiver(path=path, meta_path=meta_path), "lung": mouse.DatasetGroupLung(path=path, meta_path=meta_path), "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=meta_path), - "marrow": mouse.DatasetGroupMarrow(path=path, meta_path=meta_path), - "ovary": mouse.DatasetGroupOvary(path=path, meta_path=meta_path), + "bone": mouse.DatasetGroupBone(path=path, meta_path=meta_path), + "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=meta_path), "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=meta_path), - "peripheralblood": mouse.DatasetGroupPeripheralBlood(path=path, meta_path=meta_path), + "blood": mouse.DatasetGroupBlood(path=path, meta_path=meta_path), "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=meta_path), "prostate": mouse.DatasetGroupProstate(path=path, meta_path=meta_path), "rib": mouse.DatasetGroupRib(path=path, meta_path=meta_path), "skin": mouse.DatasetGroupSkin(path=path, meta_path=meta_path), - "smallintestine": mouse.DatasetGroupSmallintestine(path=path, meta_path=meta_path), + "ileum": mouse.DatasetGroupIleum(path=path, meta_path=meta_path), "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=meta_path), "stomach": mouse.DatasetGroupStomach(path=path, meta_path=meta_path), - "testis": mouse.DatasetGroupTestis(path=path, meta_path=meta_path), + "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=meta_path), "thymus": mouse.DatasetGroupThymus(path=path, meta_path=meta_path), "tongue": mouse.DatasetGroupTongue(path=path, meta_path=meta_path), - "trachae": mouse.DatasetGroupTrachea(path=path, meta_path=meta_path), + "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=meta_path), "uterus": mouse.DatasetGroupUterus(path=path) } self.data_human = { diff --git a/sfaira/unit_tests/external.py b/sfaira/unit_tests/external.py index 21b7266ef..6e2cfddc3 100644 --- a/sfaira/unit_tests/external.py +++ b/sfaira/unit_tests/external.py @@ -1,6 +1,6 @@ from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.interface import ModelZoo, ModelZooCelltype, ModelZooEmbedding, UserInterface -from sfaira.preprocessing import gene_filter, cell_filter, tpm_normalize +from sfaira.interface.model_zoo import ModelZoo, ModelZooCelltype, ModelZooEmbedding +from sfaira.interface.user_interface import UserInterface import sfaira.versions.celltype_versions as celltype_versions from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.topology_versions import Topologies diff --git a/sfaira/unit_tests/test_models.py b/sfaira/unit_tests/test_models.py index 1a3937827..02281d44d 100644 --- a/sfaira/unit_tests/test_models.py +++ b/sfaira/unit_tests/test_models.py @@ -7,7 +7,7 @@ from sfaira.estimators.metrics import custom_mse import sfaira.models as models -from sfaira.models import BasicModel +from sfaira.models.base import BasicModel class _TestModel: diff --git a/sfaira/versions/celltype_versions/__init__.py b/sfaira/versions/celltype_versions/__init__.py index 482315305..3e4990909 100644 --- a/sfaira/versions/celltype_versions/__init__.py +++ b/sfaira/versions/celltype_versions/__init__.py @@ -8,16 +8,27 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - mouse_e = sfairae.versions.celltype_versions.SPECIES_DICT["mouse"] - human_e = sfairae.versions.celltype_versions.SPECIES_DICT["human"] - for k in mouse.keys(): - if k in mouse_e.keys(): - mouse[k].celltype_universe.update(mouse_e[k]) - mouse[k].ontology.update(mouse_e[k]) - if k in mouse_e.keys(): - human[k].celltype_universe.update(human_e[k]) - human[k].ontology.update(human_e[k]) + from sfaira_extension.versions.celltype_versions import SPECIES_DICT as SPECIES_DICT_EXTENSION + + for organ in mouse.keys(): + if organ in SPECIES_DICT_EXTENSION["mouse"].keys(): + for v in SPECIES_DICT_EXTENSION["mouse"][organ].versions: + if v in mouse[organ].celltype_universe.keys(): + raise ValueError(f'Celltype version {v} already defined for mouse organ {organ} in base sfaira. ' + f'Please define a new version in sfaira_extension.') + else: + mouse[organ].celltype_universe[v] = SPECIES_DICT_EXTENSION["mouse"][organ].celltype_universe[v] + mouse[organ].ontology[v] = SPECIES_DICT_EXTENSION["mouse"][organ].ontology[v] + + for organ in human.keys(): + if organ in SPECIES_DICT_EXTENSION["human"].keys(): + for v in SPECIES_DICT_EXTENSION["human"][organ].versions: + if v in human[organ].celltype_universe.keys(): + raise ValueError(f'Celltype version {v} already defined for human organ {organ} in base sfaira. ' + f'Please define a new version in sfaira_extension.') + else: + human[organ].celltype_universe[v] = SPECIES_DICT_EXTENSION["human"][organ].celltype_universe[v] + human[organ].ontology[v] = SPECIES_DICT_EXTENSION["human"][organ].ontology[v] except ImportError: pass diff --git a/sfaira/versions/celltype_versions/mouse/__init__.py b/sfaira/versions/celltype_versions/mouse/__init__.py index 1b764769d..e19c15aea 100644 --- a/sfaira/versions/celltype_versions/mouse/__init__.py +++ b/sfaira/versions/celltype_versions/mouse/__init__.py @@ -1,57 +1,57 @@ from .bladder import CelltypeVersionsMouseBladder from .brain import CelltypeVersionsMouseBrain from .diaphragm import CelltypeVersionsMouseDiaphragm -from .fat import CelltypeVersionsMouseFat +from .adipose import CelltypeVersionsMouseAdipose from .heart import CelltypeVersionsMouseHeart from .kidney import CelltypeVersionsMouseKidney -from .large_intestine import CelltypeVersionsMouseLargeintestine -from .limb_muscle import CelltypeVersionsMouseLimbmuscle +from .colon import CelltypeVersionsMouseColon +from .muscle import CelltypeVersionsMouseMuscle from .liver import CelltypeVersionsMouseLiver from .lung import CelltypeVersionsMouseLung -from .mammary_gland import CelltypeVersionsMouseMammarygland -from .marrow import CelltypeVersionsMouseMarrow -from .ovary import CelltypeVersionsMouseOvary -from .peripheral_blood import CelltypeVersionsMousePeripheralblood +from .mammarygland import CelltypeVersionsMouseMammarygland +from .bone import CelltypeVersionsMouseBone +from .femalegonad import CelltypeVersionsMouseFemalegonad +from .blood import CelltypeVersionsMouseBlood from .placenta import CelltypeVersionsMousePlacenta from .pancreas import CelltypeVersionsMousePancreas from .prostate import CelltypeVersionsMouseProstate from .rib import CelltypeVersionsMouseRib from .skin import CelltypeVersionsMouseSkin -from .small_intestine import CelltypeVersionsMouseSmallintestine +from .ileum import CelltypeVersionsMouseIleum from .spleen import CelltypeVersionsMouseSpleen from .stomach import CelltypeVersionsMouseStomach -from .testis import CelltypeVersionsMouseTestis +from .malegonad import CelltypeVersionsMouseMalegonad from .thymus import CelltypeVersionsMouseThymus from .tongue import CelltypeVersionsMouseTongue -from .trachae import CelltypeVersionsMouseTrachae +from .trachea import CelltypeVersionsMouseTrachea from .uterus import CelltypeVersionsMouseUterus ORGAN_DICT = { "bladder": CelltypeVersionsMouseBladder(), "brain": CelltypeVersionsMouseBrain(), "diaphragm": CelltypeVersionsMouseDiaphragm(), - "fat": CelltypeVersionsMouseFat(), + "adipose": CelltypeVersionsMouseAdipose(), "heart": CelltypeVersionsMouseHeart(), "kidney": CelltypeVersionsMouseKidney(), - "largeintestine": CelltypeVersionsMouseLargeintestine(), - "limbmuscle": CelltypeVersionsMouseLimbmuscle(), + "colon": CelltypeVersionsMouseColon(), + "muscle": CelltypeVersionsMouseMuscle(), "liver": CelltypeVersionsMouseLiver(), "lung": CelltypeVersionsMouseLung(), "mammarygland": CelltypeVersionsMouseMammarygland(), - "marrow": CelltypeVersionsMouseMarrow(), - "ovary": CelltypeVersionsMouseOvary(), - "peripheralblood": CelltypeVersionsMousePeripheralblood(), + "bone": CelltypeVersionsMouseBone(), + "femalegonad": CelltypeVersionsMouseFemalegonad(), + "blood": CelltypeVersionsMouseBlood(), "placenta": CelltypeVersionsMousePlacenta(), "pancreas": CelltypeVersionsMousePancreas(), "prostate": CelltypeVersionsMouseProstate(), "rib": CelltypeVersionsMouseRib(), "skin": CelltypeVersionsMouseSkin(), - "smallintestine": CelltypeVersionsMouseSmallintestine(), + "ileum": CelltypeVersionsMouseIleum(), "spleen": CelltypeVersionsMouseSpleen(), "stomach": CelltypeVersionsMouseStomach(), - "testis": CelltypeVersionsMouseTestis(), + "malegonad": CelltypeVersionsMouseMalegonad(), "thymus": CelltypeVersionsMouseThymus(), "tongue": CelltypeVersionsMouseTongue(), - "trachae": CelltypeVersionsMouseTrachae(), + "trachea": CelltypeVersionsMouseTrachea(), "uterus": CelltypeVersionsMouseUterus() } diff --git a/sfaira/versions/celltype_versions/mouse/fat.py b/sfaira/versions/celltype_versions/mouse/adipose.py similarity index 76% rename from sfaira/versions/celltype_versions/mouse/fat.py rename to sfaira/versions/celltype_versions/mouse/adipose.py index 8df7c99a1..5b390523d 100644 --- a/sfaira/versions/celltype_versions/mouse/fat.py +++ b/sfaira/versions/celltype_versions/mouse/adipose.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_FAT_V0 = [ +CELLTYPES_MOUSE_ADIPOSE_V0 = [ ["B cell", "CL:0000236"], ["CD4-positive, alpha-beta T cell", "nan"], ["CD8-positive, alpha-beta T cell", "nan"], @@ -14,7 +14,7 @@ ["NK cell", "CL:0000623"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_FAT_V0 = { +ONTOLOGIES_MOUSE_ADIPOSE_V0 = { "names": { "lymphocyte": [ "B cell", "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", @@ -26,13 +26,13 @@ } -class CelltypeVersionsMouseFat(CelltypeVersionsBase): +class CelltypeVersionsMouseAdipose(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_FAT_V0 + "0": CELLTYPES_MOUSE_ADIPOSE_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_FAT_V0 + "0": ONTOLOGIES_MOUSE_ADIPOSE_V0 } - super(CelltypeVersionsMouseFat, self).__init__(**kwargs) + super(CelltypeVersionsMouseAdipose, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/peripheral_blood.py b/sfaira/versions/celltype_versions/mouse/blood.py similarity index 60% rename from sfaira/versions/celltype_versions/mouse/peripheral_blood.py rename to sfaira/versions/celltype_versions/mouse/blood.py index a381cd5ae..cc4613157 100644 --- a/sfaira/versions/celltype_versions/mouse/peripheral_blood.py +++ b/sfaira/versions/celltype_versions/mouse/blood.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_PERIPHERALBLOOD_V0 = [ +CELLTYPES_MOUSE_BLOOD_V0 = [ ["B cell", "CL:0000236"], ["macrophage", "CL:0000235"], ["T cell", "CL:0000084"], @@ -13,19 +13,19 @@ ["basophil", "nan"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_PERIPHERALBLOOD_V0 = { +ONTOLOGIES_MOUSE_BLOOD_V0 = { "names": {}, "ontology_ids": {}, } -class CelltypeVersionsMousePeripheralblood(CelltypeVersionsBase): +class CelltypeVersionsMouseBlood(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_PERIPHERALBLOOD_V0 + "0": CELLTYPES_MOUSE_BLOOD_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_PERIPHERALBLOOD_V0 + "0": ONTOLOGIES_MOUSE_BLOOD_V0 } - super(CelltypeVersionsMousePeripheralblood, self).__init__(**kwargs) + super(CelltypeVersionsMouseBlood, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/marrow.py b/sfaira/versions/celltype_versions/mouse/bone.py similarity index 83% rename from sfaira/versions/celltype_versions/mouse/marrow.py rename to sfaira/versions/celltype_versions/mouse/bone.py index a9f2f2afc..8cadbb0a2 100644 --- a/sfaira/versions/celltype_versions/mouse/marrow.py +++ b/sfaira/versions/celltype_versions/mouse/bone.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_MARROW_V0 = [ +CELLTYPES_MOUSE_BONE_V0 = [ ["basophil", "CL:0000767"], ["CD4-positive, alpha-beta T cell", "nan"], ["dendritic cell", "nan"], @@ -31,7 +31,7 @@ ["promonocyte", "CL:0000559"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_MARROW_V0 = { +ONTOLOGIES_MOUSE_BONE_V0 = { "names": { "granulocyte": ["basophil", "neutrophil", "mast cell"], "mature alpha-beta T cell": ["CD4-positive, alpha-beta T cell"] @@ -40,13 +40,13 @@ } -class CelltypeVersionsMouseMarrow(CelltypeVersionsBase): +class CelltypeVersionsMouseBone(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_MARROW_V0 + "0": CELLTYPES_MOUSE_BONE_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_MARROW_V0 + "0": ONTOLOGIES_MOUSE_BONE_V0 } - super(CelltypeVersionsMouseMarrow, self).__init__(**kwargs) + super(CelltypeVersionsMouseBone, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/large_intestine.py b/sfaira/versions/celltype_versions/mouse/colon.py similarity index 67% rename from sfaira/versions/celltype_versions/mouse/large_intestine.py rename to sfaira/versions/celltype_versions/mouse/colon.py index 16248610e..c901104ce 100644 --- a/sfaira/versions/celltype_versions/mouse/large_intestine.py +++ b/sfaira/versions/celltype_versions/mouse/colon.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_LARGEINTESTINE_V0 = [ +CELLTYPES_MOUSE_COLON_V0 = [ ["Brush cell of epithelium proper of large intestine", "CL:0002203"], ["enterocyte of epithelium of large intestine", "CL:0002071"], ["enteroendocrine cell", "CL:0000164"], @@ -12,19 +12,19 @@ ["secretory cell", "CL:0000151"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_LARGEINTESTINE_V0 = { +ONTOLOGIES_MOUSE_COLON_V0 = { "names": {}, "ontology_ids": {}, } -class CelltypeVersionsMouseLargeintestine(CelltypeVersionsBase): +class CelltypeVersionsMouseColon(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_LARGEINTESTINE_V0 + "0": CELLTYPES_MOUSE_COLON_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_LARGEINTESTINE_V0 + "0": ONTOLOGIES_MOUSE_COLON_V0 } - super(CelltypeVersionsMouseLargeintestine, self).__init__(**kwargs) + super(CelltypeVersionsMouseColon, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/ovary.py b/sfaira/versions/celltype_versions/mouse/femalegonad.py similarity index 67% rename from sfaira/versions/celltype_versions/mouse/ovary.py rename to sfaira/versions/celltype_versions/mouse/femalegonad.py index b2c10f074..19278ed31 100644 --- a/sfaira/versions/celltype_versions/mouse/ovary.py +++ b/sfaira/versions/celltype_versions/mouse/femalegonad.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_OVARY_V0 = [ +CELLTYPES_MOUSE_FEMALEGONAD_V0 = [ ["cumulus cell", "nan"], ["granulosa cell", "nan"], ["large luteal cell", "nan"], @@ -13,7 +13,7 @@ ["thecal cell", "nan"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_OVARY_V0 = { +ONTOLOGIES_MOUSE_FEMALEGONAD_V0 = { "names": { 'luteal cell': ['small luteal cell', 'large luteal cell'], }, @@ -21,13 +21,13 @@ } -class CelltypeVersionsMouseOvary(CelltypeVersionsBase): +class CelltypeVersionsMouseFemalegonad(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_OVARY_V0 + "0": CELLTYPES_MOUSE_FEMALEGONAD_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_OVARY_V0 + "0": ONTOLOGIES_MOUSE_FEMALEGONAD_V0 } - super(CelltypeVersionsMouseOvary, self).__init__(**kwargs) + super(CelltypeVersionsMouseFemalegonad, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/small_intestine.py b/sfaira/versions/celltype_versions/mouse/ileum.py similarity index 64% rename from sfaira/versions/celltype_versions/mouse/small_intestine.py rename to sfaira/versions/celltype_versions/mouse/ileum.py index c350d7f46..1f190bd5c 100644 --- a/sfaira/versions/celltype_versions/mouse/small_intestine.py +++ b/sfaira/versions/celltype_versions/mouse/ileum.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_SMALLINTESTINE_V0 = [ +CELLTYPES_MOUSE_ILEUM_V0 = [ ["B cell", "CL:0000236"], ["macrophage", "CL:0000235"], ["T cell", "CL:0000084"], @@ -15,19 +15,19 @@ ["erythroblast", "nan"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_SMALLINTESTINE_V0 = { +ONTOLOGIES_MOUSE_ILEUM_V0 = { "names": {}, "ontology_ids": {}, } -class CelltypeVersionsMouseSmallintestine(CelltypeVersionsBase): +class CelltypeVersionsMouseIleum(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_SMALLINTESTINE_V0 + "0": CELLTYPES_MOUSE_ILEUM_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_SMALLINTESTINE_V0 + "0": ONTOLOGIES_MOUSE_ILEUM_V0 } - super(CelltypeVersionsMouseSmallintestine, self).__init__(**kwargs) + super(CelltypeVersionsMouseIleum, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/testis.py b/sfaira/versions/celltype_versions/mouse/malegonad.py similarity index 66% rename from sfaira/versions/celltype_versions/mouse/testis.py rename to sfaira/versions/celltype_versions/mouse/malegonad.py index 78b9b8b61..f311320a7 100644 --- a/sfaira/versions/celltype_versions/mouse/testis.py +++ b/sfaira/versions/celltype_versions/mouse/malegonad.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_TESTIS_V0 = [ +CELLTYPES_MOUSE_MALEGONAD_V0 = [ ["macrophage", "CL:0000235"], ["leydig cell", "nan"], ["elongating spermatid", "nan"], @@ -14,20 +14,20 @@ ["spermatid", "nan"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_TESTIS_V0 = { +ONTOLOGIES_MOUSE_MALEGONAD_V0 = { "names": { }, "ontology_ids": {}, } -class CelltypeVersionsMouseTestis(CelltypeVersionsBase): +class CelltypeVersionsMouseMalegonad(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_TESTIS_V0 + "0": CELLTYPES_MOUSE_MALEGONAD_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_TESTIS_V0 + "0": ONTOLOGIES_MOUSE_MALEGONAD_V0 } - super(CelltypeVersionsMouseTestis, self).__init__(**kwargs) + super(CelltypeVersionsMouseMalegonad, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/mammary_gland.py b/sfaira/versions/celltype_versions/mouse/mammarygland.py similarity index 100% rename from sfaira/versions/celltype_versions/mouse/mammary_gland.py rename to sfaira/versions/celltype_versions/mouse/mammarygland.py diff --git a/sfaira/versions/celltype_versions/mouse/limb_muscle.py b/sfaira/versions/celltype_versions/mouse/muscle.py similarity index 72% rename from sfaira/versions/celltype_versions/mouse/limb_muscle.py rename to sfaira/versions/celltype_versions/mouse/muscle.py index 8ac490914..b2ffdc66d 100644 --- a/sfaira/versions/celltype_versions/mouse/limb_muscle.py +++ b/sfaira/versions/celltype_versions/mouse/muscle.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_LIMBMUSCLE_V0 = [ +CELLTYPES_MOUSE_MUSCLE_V0 = [ ["B cell", "CL:0000236"], ["dendritic cell", "nan"], ["endothelial cell", "CL:0000115"], @@ -19,20 +19,20 @@ ["T cell", "CL:0000084"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_LIMBMUSCLE_V0 = { +ONTOLOGIES_MOUSE_MUSCLE_V0 = { "names": { }, "ontology_ids": {}, } -class CelltypeVersionsMouseLimbmuscle(CelltypeVersionsBase): +class CelltypeVersionsMouseMuscle(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_LIMBMUSCLE_V0 + "0": CELLTYPES_MOUSE_MUSCLE_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_LIMBMUSCLE_V0 + "0": ONTOLOGIES_MOUSE_MUSCLE_V0 } - super(CelltypeVersionsMouseLimbmuscle, self).__init__(**kwargs) + super(CelltypeVersionsMouseMuscle, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/trachae.py b/sfaira/versions/celltype_versions/mouse/trachea.py similarity index 77% rename from sfaira/versions/celltype_versions/mouse/trachae.py rename to sfaira/versions/celltype_versions/mouse/trachea.py index 68d58e0d5..57ad18f33 100644 --- a/sfaira/versions/celltype_versions/mouse/trachae.py +++ b/sfaira/versions/celltype_versions/mouse/trachea.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_TRACHAE_V0 = [ +CELLTYPES_MOUSE_TRACHEA_V0 = [ ["basal epithelial cell of tracheobronchial tree", "CL:0002329"], ["chondrocyte", "CL:0000138"], ["ciliated columnar cell of tracheobronchial tree", "CL:0002145"], @@ -19,7 +19,7 @@ ["T cell", "CL:0000084"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_TRACHAE_V0 = { +ONTOLOGIES_MOUSE_TRACHEA_V0 = { "names": { 'blood cell': ["granulocyte", "macrophage", "T cell"] }, @@ -27,13 +27,13 @@ } -class CelltypeVersionsMouseTrachae(CelltypeVersionsBase): +class CelltypeVersionsMouseTrachea(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_TRACHAE_V0 + "0": CELLTYPES_MOUSE_TRACHEA_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_TRACHAE_V0 + "0": ONTOLOGIES_MOUSE_TRACHEA_V0 } - super(CelltypeVersionsMouseTrachae, self).__init__(**kwargs) + super(CelltypeVersionsMouseTrachea, self).__init__(**kwargs) diff --git a/sfaira/versions/genome_versions/class_interface.py b/sfaira/versions/genome_versions/class_interface.py index a7ac20546..9b28a5994 100644 --- a/sfaira/versions/genome_versions/class_interface.py +++ b/sfaira/versions/genome_versions/class_interface.py @@ -14,11 +14,30 @@ def __init__( ): self.species = species if self.species == "human": - from .human import GenomeContainer + try: + from sfaira_extension.versions.genome_versions.human import GenomeContainer + if genome not in GenomeContainer.available_genomes: + from .human import GenomeContainer + if genome not in GenomeContainer.available_genomes: + raise ValueError(f"Genome {genome} not recognised.") + except ImportError: + from .human import GenomeContainer + if genome not in GenomeContainer.available_genomes: + raise ValueError(f"Genome {genome} not recognised.") elif self.species == "mouse": - from .mouse import GenomeContainer + try: + from sfaira_extension.versions.genome_versions.mouse import GenomeContainer + if genome not in GenomeContainer.available_genomes: + from .mouse import GenomeContainer + if genome not in GenomeContainer.available_genomes: + raise ValueError(f"Genome {genome} not recognised.") + except ImportError: + from .mouse import GenomeContainer + if genome not in GenomeContainer.available_genomes: + raise ValueError(f"Genome {genome} not recognised.") else: - raise ValueError("species %s not recognized" % species) + raise ValueError(f"Species {species} not recognised.") + self.gc = GenomeContainer() self.set_genome(genome=genome) diff --git a/sfaira/versions/genome_versions/human/genome_container.py b/sfaira/versions/genome_versions/human/genome_container.py index 1065497f2..80e2bd8a3 100644 --- a/sfaira/versions/genome_versions/human/genome_container.py +++ b/sfaira/versions/genome_versions/human/genome_container.py @@ -5,6 +5,7 @@ class GenomeContainer: + available_genomes = ["Homo_sapiens_GRCh38_97"] def __init__(self): self.genomes = { @@ -15,4 +16,4 @@ def __init__(self): } def read_local_csv(self, genome): - return pandas.read_csv(os.path.join(str(os.path.dirname(__file__)), self.genomes[genome])) \ No newline at end of file + return pandas.read_csv(os.path.join(str(os.path.dirname(__file__)), self.genomes[genome])) diff --git a/sfaira/versions/genome_versions/mouse/genome_container.py b/sfaira/versions/genome_versions/mouse/genome_container.py index 7425c9421..ce9d047ab 100644 --- a/sfaira/versions/genome_versions/mouse/genome_container.py +++ b/sfaira/versions/genome_versions/mouse/genome_container.py @@ -5,6 +5,7 @@ class GenomeContainer: + available_genomes = ["Mus_musculus_GRCm38_97"] def __init__(self): self.genomes = { diff --git a/sfaira/versions/topology_versions/human/celltype/celltypemarker.py b/sfaira/versions/topology_versions/human/celltype/celltypemarker.py index 83971dd50..a31807448 100644 --- a/sfaira/versions/topology_versions/human/celltype/celltypemarker.py +++ b/sfaira/versions/topology_versions/human/celltype/celltypemarker.py @@ -15,10 +15,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.celltype.CELLTYPEMARKER_TOPOLOGIES - for k in CELLTYPEMARKER_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - CELLTYPEMARKER_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.celltype import CELLTYPEMARKER_TOPOLOGIES as CELLTYPEMARKER_TOPOLOGIES_EXTENSION + CELLTYPEMARKER_TOPOLOGIES = { + **CELLTYPEMARKER_TOPOLOGIES, + **CELLTYPEMARKER_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/celltype/celltypemlp.py b/sfaira/versions/topology_versions/human/celltype/celltypemlp.py index e184b2cfa..827ffba91 100644 --- a/sfaira/versions/topology_versions/human/celltype/celltypemlp.py +++ b/sfaira/versions/topology_versions/human/celltype/celltypemlp.py @@ -67,10 +67,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.celltype.CELLTYPEMLP_TOPOLOGIES - for k in CELLTYPEMLP_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - CELLTYPEMLP_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.celltype import CELLTYPEMLP_TOPOLOGIES as CELLTYPEMLP_TOPOLOGIES_EXTENSION + CELLTYPEMLP_TOPOLOGIES = { + **CELLTYPEMLP_TOPOLOGIES, + **CELLTYPEMLP_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/embedding/ae.py b/sfaira/versions/topology_versions/human/embedding/ae.py index 0a4956afb..225100769 100644 --- a/sfaira/versions/topology_versions/human/embedding/ae.py +++ b/sfaira/versions/topology_versions/human/embedding/ae.py @@ -62,10 +62,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.embedding.AE_TOPOLOGIES - for k in AE_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - AE_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.embedding import AE_TOPOLOGIES as AE_TOPOLOGIES_EXTENSION + AE_TOPOLOGIES = { + **AE_TOPOLOGIES, + **AE_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/embedding/linear.py b/sfaira/versions/topology_versions/human/embedding/linear.py index 5b16800a7..80f9edeca 100644 --- a/sfaira/versions/topology_versions/human/embedding/linear.py +++ b/sfaira/versions/topology_versions/human/embedding/linear.py @@ -35,10 +35,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.embedding.LINEAR_TOPOLOGIES - for k in LINEAR_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - LINEAR_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.embedding import LINEAR_TOPOLOGIES as LINEAR_TOPOLOGIES_EXTENSION + LINEAR_TOPOLOGIES = { + **LINEAR_TOPOLOGIES, + **LINEAR_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/embedding/nmf.py b/sfaira/versions/topology_versions/human/embedding/nmf.py index 74975ddd0..d006be9cb 100644 --- a/sfaira/versions/topology_versions/human/embedding/nmf.py +++ b/sfaira/versions/topology_versions/human/embedding/nmf.py @@ -35,10 +35,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.embedding.NMF_TOPOLOGIES - for k in NMF_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - NMF_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.embedding import NMF_TOPOLOGIES as NMF_TOPOLOGIES_EXTENSION + NMF_TOPOLOGIES = { + **NMF_TOPOLOGIES, + **NMF_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/embedding/vae.py b/sfaira/versions/topology_versions/human/embedding/vae.py index 8906ace58..535a907c8 100644 --- a/sfaira/versions/topology_versions/human/embedding/vae.py +++ b/sfaira/versions/topology_versions/human/embedding/vae.py @@ -58,10 +58,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.embedding.VAE_TOPOLOGIES - for k in VAE_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - VAE_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.embedding import VAE_TOPOLOGIES as VAE_TOPOLOGIES_EXTENSION + VAE_TOPOLOGIES = { + **VAE_TOPOLOGIES, + **VAE_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/embedding/vaeiaf.py b/sfaira/versions/topology_versions/human/embedding/vaeiaf.py index db0f678cd..0602ac457 100644 --- a/sfaira/versions/topology_versions/human/embedding/vaeiaf.py +++ b/sfaira/versions/topology_versions/human/embedding/vaeiaf.py @@ -31,10 +31,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.embedding.VAEIAF_TOPOLOGIES - for k in VAEIAF_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - VAEIAF_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.embedding import VAEIAF_TOPOLOGIES as VAEIAF_TOPOLOGIES_EXTENSION + VAEIAF_TOPOLOGIES = { + **VAEIAF_TOPOLOGIES, + **VAEIAF_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/embedding/vaevamp.py b/sfaira/versions/topology_versions/human/embedding/vaevamp.py index 7b4d1585c..d4fff9f69 100644 --- a/sfaira/versions/topology_versions/human/embedding/vaevamp.py +++ b/sfaira/versions/topology_versions/human/embedding/vaevamp.py @@ -7,8 +7,8 @@ "l2_coef": 0., "dropout_rate": 0., "batchnorm": True, - "activation": "tanh", - "init": "glorot_uniform", + "activation": "selu", + "init": "lecun_normal", "output_layer": "nb_shared_disp" } }, @@ -20,8 +20,8 @@ "l2_coef": 0., "dropout_rate": 0., "batchnorm": True, - "activation": "tanh", - "init": "glorot_uniform", + "activation": "selu", + "init": "lecun_normal", "output_layer": "nb_shared_disp" } } @@ -29,10 +29,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.embedding.VAEVAMP_TOPOLOGIES - for k in VAEVAMP_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - VAEVAMP_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.embedding import VAEVAMP_TOPOLOGIES as VAEVAMP_TOPOLOGIES_EXTENSION + VAEVAMP_TOPOLOGIES = { + **VAEVAMP_TOPOLOGIES, + **VAEVAMP_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/celltype/celltypemarker.py b/sfaira/versions/topology_versions/mouse/celltype/celltypemarker.py index 6548bd428..8043c48ad 100644 --- a/sfaira/versions/topology_versions/mouse/celltype/celltypemarker.py +++ b/sfaira/versions/topology_versions/mouse/celltype/celltypemarker.py @@ -15,10 +15,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.celltype.CELLTYPEMARKER_TOPOLOGIES - for k in CELLTYPEMARKER_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - CELLTYPEMARKER_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.celltype import CELLTYPEMARKER_TOPOLOGIES as CELLTYPEMARKER_TOPOLOGIES_EXTENSION + CELLTYPEMARKER_TOPOLOGIES = { + **CELLTYPEMARKER_TOPOLOGIES, + **CELLTYPEMARKER_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/celltype/celltypemlp.py b/sfaira/versions/topology_versions/mouse/celltype/celltypemlp.py index 9117eb2cb..97d029fb9 100644 --- a/sfaira/versions/topology_versions/mouse/celltype/celltypemlp.py +++ b/sfaira/versions/topology_versions/mouse/celltype/celltypemlp.py @@ -67,10 +67,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.celltype.CELLTYPEMLP_TOPOLOGIES - for k in CELLTYPEMLP_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - CELLTYPEMLP_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.celltype import CELLTYPEMLP_TOPOLOGIES as CELLTYPEMLP_TOPOLOGIES_EXTENSION + CELLTYPEMLP_TOPOLOGIES = { + **CELLTYPEMLP_TOPOLOGIES, + **CELLTYPEMLP_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/embedding/ae.py b/sfaira/versions/topology_versions/mouse/embedding/ae.py index f10ab4e4b..4c628642a 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/ae.py +++ b/sfaira/versions/topology_versions/mouse/embedding/ae.py @@ -62,10 +62,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.embedding.AE_TOPOLOGIES - for k in AE_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - AE_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.embedding import AE_TOPOLOGIES as AE_TOPOLOGIES_EXTENSION + AE_TOPOLOGIES = { + **AE_TOPOLOGIES, + **AE_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/embedding/linear.py b/sfaira/versions/topology_versions/mouse/embedding/linear.py index da9bff3af..cd07f0366 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/linear.py +++ b/sfaira/versions/topology_versions/mouse/embedding/linear.py @@ -35,10 +35,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.embedding.LINEAR_TOPOLOGIES - for k in LINEAR_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - LINEAR_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.embedding import LINEAR_TOPOLOGIES as LINEAR_TOPOLOGIES_EXTENSION + LINEAR_TOPOLOGIES = { + **LINEAR_TOPOLOGIES, + **LINEAR_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/embedding/nmf.py b/sfaira/versions/topology_versions/mouse/embedding/nmf.py index 913139b12..65b2b44a3 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/nmf.py +++ b/sfaira/versions/topology_versions/mouse/embedding/nmf.py @@ -35,10 +35,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.embedding.NMF_TOPOLOGIES - for k in NMF_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - NMF_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.embedding import NMF_TOPOLOGIES as NMF_TOPOLOGIES_EXTENSION + NMF_TOPOLOGIES = { + **NMF_TOPOLOGIES, + **NMF_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/embedding/vae.py b/sfaira/versions/topology_versions/mouse/embedding/vae.py index c23307f56..49b45b01f 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vae.py +++ b/sfaira/versions/topology_versions/mouse/embedding/vae.py @@ -58,10 +58,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.embedding.VAE_TOPOLOGIES - for k in VAE_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - VAE_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.embedding import VAE_TOPOLOGIES as VAE_TOPOLOGIES_EXTENSION + VAE_TOPOLOGIES = { + **VAE_TOPOLOGIES, + **VAE_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py b/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py index 06dd826ce..d6dd458b2 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py +++ b/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py @@ -31,10 +31,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.embedding.VAEIAF_TOPOLOGIES - for k in VAEIAF_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - VAEIAF_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.embedding import VAEIAF_TOPOLOGIES as VAEIAF_TOPOLOGIES_EXTENSION + VAEIAF_TOPOLOGIES = { + **VAEIAF_TOPOLOGIES, + **VAEIAF_TOPOLOGIES_EXTENSION + } except ImportError: - pass + pass \ No newline at end of file diff --git a/sfaira/versions/topology_versions/mouse/embedding/vaevamp.py b/sfaira/versions/topology_versions/mouse/embedding/vaevamp.py index f88b3488b..33e488224 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vaevamp.py +++ b/sfaira/versions/topology_versions/mouse/embedding/vaevamp.py @@ -29,10 +29,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.embedding.VAEVAMP_TOPOLOGIES - for k in VAEVAMP_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - VAEVAMP_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.embedding import VAEVAMP_TOPOLOGIES as VAEVAMP_TOPOLOGIES_EXTENSION + VAEVAMP_TOPOLOGIES = { + **VAEVAMP_TOPOLOGIES, + **VAEVAMP_TOPOLOGIES_EXTENSION + } except ImportError: pass