diff --git a/.bandit.yml b/.bandit.yml new file mode 100644 index 000000000..21892526a --- /dev/null +++ b/.bandit.yml @@ -0,0 +1,7 @@ +### profile may optionally select or skip tests + +# (optional) list included tests here: +tests: [] + +# (optional) list skipped tests here: +skips: ['B101', 'B403', 'B404', 'B603', 'B607', 'B301', 'B303', 'B311', 'B310'] diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..c08bbb0fd --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,28 @@ +--- +name: Bug report +about: Create a bug report to help us improve +title: 'Bug Summary' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. ... +2. ... +3. ... + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**System [please complete the following information]:** + - OS: e.g. [Ubuntu 20.04] + - Language Version: [e.g. Python 3.9] + - Virtual environment: [e.g. Conda] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 000000000..071044f83 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,17 @@ +--- +name: Feature request +about: Suggest a new feature +title: 'Feature Request Summary' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when ... + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/general_question.md b/.github/ISSUE_TEMPLATE/general_question.md new file mode 100644 index 000000000..9b429f568 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/general_question.md @@ -0,0 +1,13 @@ +--- +name: General question +about: Ask a question about anything related to this project +title: 'Question' +labels: '' +assignees: '' + +--- + +**Question** + +Please ask your question here. It can be about the usage of this project, the internals, the implementation or whatever interests you. +Please use the BUG template for bugs, and the FEATURE REQUEST template for feature requests. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..8ace0c2b2 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,25 @@ +version: 2 +updates: +- package-ecosystem: pip + directory: "/" + schedule: + interval: daily + time: "04:00" + open-pull-requests-limit: 99 + target-branch: development + labels: + - dependabot + commit-message: + prefix: "[DEPENDABOT]" + +- package-ecosystem: github-actions + directory: "/" + schedule: + interval: daily + time: "04:00" + open-pull-requests-limit: 99 + target-branch: development + labels: + - dependabot + commit-message: + prefix: "[DEPENDABOT]" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..c017fb887 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,19 @@ +Many thanks for contributing to sfaira! + +**PR Checklist** +Please fill in the appropriate checklist below (delete whatever is not relevant). These are the most common things requested on pull requests (PRs). + + - [ ] This comment contains a description of changes (with reason) + - [ ] Referenced issue is linked + - [ ] If you've fixed a bug or added code that should be tested, add tests! + - [ ] Documentation in `docs` is updated + - [ ] `docs/release-notes.rst` is updated + +**Description of changes** +Please state what you've changed and how it might affect the user. + +**Technical details** +Please state any technical details such as limitations, reasons for additional dependencies, benchmarks etc. here. + +**Additional context** +Add any other context or screenshots here. diff --git a/.github/workflows/build_package.yml b/.github/workflows/build_package.yml new file mode 100644 index 000000000..ec199d5a6 --- /dev/null +++ b/.github/workflows/build_package.yml @@ -0,0 +1,42 @@ +name: Build sfaira Package + +on: [push, pull_request] + +jobs: + build: + runs-on: ${{ matrix.os }} + if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" + strategy: + matrix: + os: [macos-latest, ubuntu-latest, windows-latest] + python: [3.7, 3.8] + env: + PYTHONIOENCODING: utf-8 + + steps: + - uses: actions/checkout@v2 + name: Check out source-code repository + + - name: Setup Python + uses: actions/setup-python@v2.1.4 + with: + python-version: ${{ matrix.python }} + + - name: Upgrade and install pip + run: python -m pip install --upgrade pip + + - name: Build sfaira + run: pip install . + + - name: Import sfaira + run: python -c "import sfaira" + + # Verify that the package does adhere to PyPI's standards + - name: Install required twine packaging dependencies + run: pip install setuptools wheel twine + + - name: Build twine package + run: python setup.py sdist bdist_wheel + + - name: Check twine package + run: twine check dist/* diff --git a/.github/workflows/pr_to_master_from_patch_release_only.yml b/.github/workflows/pr_to_master_from_patch_release_only.yml new file mode 100644 index 000000000..c1d9467ab --- /dev/null +++ b/.github/workflows/pr_to_master_from_patch_release_only.yml @@ -0,0 +1,34 @@ +name: PR to master branch from patch/release branch only + +on: + pull_request: + branches: + - master + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the repository master branch are only ok if coming from any patch or release branch + - name: Check PRs + run: | + { [[ $GITHUB_HEAD_REF = *"release"* ]]; } || [[ $GITHUB_HEAD_REF == *"patch"* ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this may not work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the ${{github.event.pull_request.head.repo.full_name}} `master` branch. + The `master` branch should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from any ${{github.event.pull_request.head.repo.full_name}} `release` or `patch` branch. + + You do not need to close this PR, you can change the target branch to `development` by clicking the _"Edit"_ button at the top of this page. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/publish_docs.yml b/.github/workflows/publish_docs.yml new file mode 100644 index 000000000..8fba4ee58 --- /dev/null +++ b/.github/workflows/publish_docs.yml @@ -0,0 +1,42 @@ +name: Build Documentation + +on: + push: + paths: + - "docs/**" + pull_request: + paths: + - "docs/**" + +jobs: + build: + + runs-on: ubuntu-latest + if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" + + steps: + - uses: actions/checkout@v2 + name: Check out source-code repository + + - name: Setup Python + uses: actions/setup-python@v2.1.4 + with: + python-version: 3.8 + + - name: Install pip + run: python -m pip install --upgrade pip + + - name: Install doc dependencies + run: pip install -r docs/requirements.txt + + - name: Build docs + run: | + cd docs + make html + + - name: Deploy + if: ${{ github.ref == 'refs/heads/master' && github.event_name == 'push' }} + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./docs/_build/html diff --git a/.github/workflows/publish_package.yml b/.github/workflows/publish_package.yml new file mode 100644 index 000000000..409a1b10e --- /dev/null +++ b/.github/workflows/publish_package.yml @@ -0,0 +1,31 @@ +name: Publish sfaira to PyPI + +on: + release: + types: [created] + +jobs: + deploy: + runs-on: ubuntu-latest + if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" + steps: + - uses: actions/checkout@v2 + name: Check out source-code repository + + - name: Set up Python + uses: actions/setup-python@v2.1.4 + with: + python-version: '3.9' + + - name: Install pip, setuptools, wheel, twine + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.github/workflows/run_bandit.yml b/.github/workflows/run_bandit.yml new file mode 100644 index 000000000..95719c26f --- /dev/null +++ b/.github/workflows/run_bandit.yml @@ -0,0 +1,30 @@ +name: Run bandit + +on: + push: + paths: + - "**/*.py" + pull_request: + paths: + - "**/*.py" + +jobs: + build: + + runs-on: ubuntu-latest + if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" + + steps: + - uses: actions/checkout@v2 + name: Check out source-code repository + + - name: Setup Python + uses: actions/setup-python@v2.1.4 + with: + python-version: 3.9 + + - name: Install bandit + run: pip install bandit + + - name: Run bandit + run: bandit -r sfaira -c .bandit.yml diff --git a/.github/workflows/run_flake8_linting.yml b/.github/workflows/run_flake8_linting.yml new file mode 100644 index 000000000..737f7b80e --- /dev/null +++ b/.github/workflows/run_flake8_linting.yml @@ -0,0 +1,31 @@ +name: Run flake8 linting + +on: + push: + paths: + - "**/*.py" + pull_request: + paths: + - "**/*.py" + +jobs: + lint: + runs-on: ubuntu-latest + if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" + + steps: + - uses: actions/checkout@v2 + name: Check out source-code repository + + - name: Setup Python + uses: actions/setup-python@v2.1.4 + with: + python-version: 3.9 + + - name: Install pip + run: python -m pip install --upgrade pip + + - name: Lint with flake8 + run: | + pip install flake8 + flake8 . diff --git a/.gitignore b/.gitignore index 974bd87ce..362e74721 100644 --- a/.gitignore +++ b/.gitignore @@ -1,17 +1,149 @@ -sfaira/unit_tests/test_data +sfaira/unit_tests/test_data_loaders/* +sfaira/unit_tests/test_data/* +sfaira/unit_tests/test_data_template.py git abuild cache sfaira.egg-info config.ini .metadata -.idea .Rhistory playground/* venv/* -**/__pycache__ *.ipynb_checkpoints/ */*.ipynb_checkpoints/ **/.DS_Store docs/_templates/ dist/ !**/.gitignore + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Jetbrains IDE +.idea/ diff --git a/.readthedocs.yml b/.readthedocs.yml index 6a8f1a146..b5013855b 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,4 +1,15 @@ +version: 2 + build: image: latest + +sphinx: + configuration: docs/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +formats: all + python: - version: 3.8 + version: 3.9 + install: + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/README.rst b/README.rst index cf758b0c7..a116498a7 100644 --- a/README.rst +++ b/README.rst @@ -1,11 +1,25 @@ -|Stars| |PyPI| |PyPIDownloads| +|Build| |Documentation| |Stars| |PyPI| |PyPIDownloads| + + +.. |Build| image:: https://github.com/theislab/sfaira/workflows/Build%20sfaira%20Package/badge.svg + :target: https://github.com/theislab/sfaira/workflows/Build%20sfaira%20Package/badge.svg + :alt: Github Workflow Build sfaira Status + +.. |Documentation| image:: https://readthedocs.org/projects/sfaira/badge/?version=latest + :target: https://sfaira.readthedocs.io/en/latest/ + :alt: Documentation Status .. |Stars| image:: https://img.shields.io/github/stars/theislab/sfaira?logo=GitHub&color=yellow :target: https://github.com/theislab/sfaira/stargazers + :alt: Github Stars + .. |PyPI| image:: https://img.shields.io/pypi/v/sfaira?logo=PyPI :target: https://pypi.org/project/sfaira + :alt: PyPI Version + .. |PyPIDownloads| image:: https://pepy.tech/badge/sfaira :target: https://pepy.tech/project/sfaira + :alt: Number of downloads sfaira - data and model repository for single-cell data @@ -15,7 +29,7 @@ sfaira - data and model repository for single-cell data :width: 1000px :align: center -sfaira_ is a model and a data repository in a single python package. +sfaira_ is a model and a data repository in a single python package (preprint_). We provide an interactive overview of the current state of the zoos on sfaira-site_. Its data zoo gives users access to streamlined data loaders that allow reproducible use of published and private data sets for model training and exploration. @@ -26,6 +40,7 @@ sfaira integrates into scanpy_ workflows. .. _scanpy: https://github.com/theislab/scanpy .. _sfaira: https://sfaira.readthedocs.io +.. _preprint: https://www.biorxiv.org/content/10.1101/2020.12.16.419036v1 .. _DCA: https://github.com/theislab/dca .. _scArches: https://github.com/theislab/scarches .. _sfaira-site: https://theislab.github.io/sfaira-site/index.html diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000..4a52c31ee --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = python -msphinx +SPHINXPROJ = system_intelligence +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/api/sfaira.data.DatasetBase.rst b/docs/api/sfaira.data.DatasetBase.rst new file mode 100644 index 000000000..22b767b80 --- /dev/null +++ b/docs/api/sfaira.data.DatasetBase.rst @@ -0,0 +1,75 @@ +sfaira.data.DatasetBase +======================= + +.. currentmodule:: sfaira.data + +.. autoclass:: DatasetBase + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~DatasetBase.__init__ + ~DatasetBase.assert_celltype_version_key + ~DatasetBase.clear + ~DatasetBase.load + ~DatasetBase.load_meta + ~DatasetBase.load_tobacked + ~DatasetBase.map_ontology_class + ~DatasetBase.set_default_type_version + ~DatasetBase.set_unkown_class_id + ~DatasetBase.subset_organs + ~DatasetBase.write_meta + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~DatasetBase.age + ~DatasetBase.annotated + ~DatasetBase.author + ~DatasetBase.available_type_versions + ~DatasetBase.citation + ~DatasetBase.dev_stage + ~DatasetBase.doi + ~DatasetBase.doi_cleaned_id + ~DatasetBase.download + ~DatasetBase.download_meta + ~DatasetBase.ethnicity + ~DatasetBase.healthy + ~DatasetBase.healthy_state_healthy + ~DatasetBase.id + ~DatasetBase.meta + ~DatasetBase.meta_fn + ~DatasetBase.ncells + ~DatasetBase.normalization + ~DatasetBase.obs_key_age + ~DatasetBase.obs_key_cellontology_id + ~DatasetBase.obs_key_cellontology_original + ~DatasetBase.obs_key_dev_stage + ~DatasetBase.obs_key_ethnicity + ~DatasetBase.obs_key_healthy + ~DatasetBase.obs_key_organ + ~DatasetBase.obs_key_organism + ~DatasetBase.obs_key_protocol + ~DatasetBase.obs_key_sex + ~DatasetBase.obs_key_state_exact + ~DatasetBase.organ + ~DatasetBase.organism + ~DatasetBase.protocol + ~DatasetBase.sex + ~DatasetBase.source + ~DatasetBase.state_exact + ~DatasetBase.var_ensembl_col + ~DatasetBase.var_symbol_col + ~DatasetBase.year + + \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetInteractive.rst b/docs/api/sfaira.data.DatasetInteractive.rst new file mode 100644 index 000000000..d1bda2a3f --- /dev/null +++ b/docs/api/sfaira.data.DatasetInteractive.rst @@ -0,0 +1,75 @@ +sfaira.data.DatasetInteractive +============================== + +.. currentmodule:: sfaira.data + +.. autoclass:: DatasetInteractive + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~DatasetInteractive.__init__ + ~DatasetInteractive.assert_celltype_version_key + ~DatasetInteractive.clear + ~DatasetInteractive.load + ~DatasetInteractive.load_meta + ~DatasetInteractive.load_tobacked + ~DatasetInteractive.map_ontology_class + ~DatasetInteractive.set_default_type_version + ~DatasetInteractive.set_unkown_class_id + ~DatasetInteractive.subset_organs + ~DatasetInteractive.write_meta + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~DatasetInteractive.age + ~DatasetInteractive.annotated + ~DatasetInteractive.author + ~DatasetInteractive.available_type_versions + ~DatasetInteractive.citation + ~DatasetInteractive.dev_stage + ~DatasetInteractive.doi + ~DatasetInteractive.doi_cleaned_id + ~DatasetInteractive.download + ~DatasetInteractive.download_meta + ~DatasetInteractive.ethnicity + ~DatasetInteractive.healthy + ~DatasetInteractive.healthy_state_healthy + ~DatasetInteractive.id + ~DatasetInteractive.meta + ~DatasetInteractive.meta_fn + ~DatasetInteractive.ncells + ~DatasetInteractive.normalization + ~DatasetInteractive.obs_key_age + ~DatasetInteractive.obs_key_cellontology_id + ~DatasetInteractive.obs_key_cellontology_original + ~DatasetInteractive.obs_key_dev_stage + ~DatasetInteractive.obs_key_ethnicity + ~DatasetInteractive.obs_key_healthy + ~DatasetInteractive.obs_key_organ + ~DatasetInteractive.obs_key_organism + ~DatasetInteractive.obs_key_protocol + ~DatasetInteractive.obs_key_sex + ~DatasetInteractive.obs_key_state_exact + ~DatasetInteractive.organ + ~DatasetInteractive.organism + ~DatasetInteractive.protocol + ~DatasetInteractive.sex + ~DatasetInteractive.source + ~DatasetInteractive.state_exact + ~DatasetInteractive.var_ensembl_col + ~DatasetInteractive.var_symbol_col + ~DatasetInteractive.year + + \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetSuperGroup.rst b/docs/api/sfaira.data.DatasetSuperGroup.rst new file mode 100644 index 000000000..cdbb18916 --- /dev/null +++ b/docs/api/sfaira.data.DatasetSuperGroup.rst @@ -0,0 +1,35 @@ +sfaira.data.DatasetSuperGroup +============================= + +.. currentmodule:: sfaira.data + +.. autoclass:: DatasetSuperGroup + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~DatasetSuperGroup.__init__ + ~DatasetSuperGroup.delete_backed + ~DatasetSuperGroup.extend_dataset_groups + ~DatasetSuperGroup.flatten + ~DatasetSuperGroup.get_gc + ~DatasetSuperGroup.load_all + ~DatasetSuperGroup.load_all_tobacked + ~DatasetSuperGroup.load_cached_backed + ~DatasetSuperGroup.ncells + ~DatasetSuperGroup.ncells_bydataset + ~DatasetSuperGroup.ncells_bydataset_flat + ~DatasetSuperGroup.set_dataset_groups + ~DatasetSuperGroup.subset + ~DatasetSuperGroup.subset_organs + + + + + + \ No newline at end of file diff --git a/docs/api/sfaira.data.human.rst b/docs/api/sfaira.data.human.rst new file mode 100644 index 000000000..bddab197c --- /dev/null +++ b/docs/api/sfaira.data.human.rst @@ -0,0 +1,23 @@ +sfaira.data.human +================= + +.. automodule:: sfaira.data.human + + + + + + + + + + + + + + + + + + + diff --git a/docs/api/sfaira.data.mouse.rst b/docs/api/sfaira.data.mouse.rst new file mode 100644 index 000000000..af3c07453 --- /dev/null +++ b/docs/api/sfaira.data.mouse.rst @@ -0,0 +1,23 @@ +sfaira.data.mouse +================= + +.. automodule:: sfaira.data.mouse + + + + + + + + + + + + + + + + + + + diff --git a/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst b/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst new file mode 100644 index 000000000..415c01979 --- /dev/null +++ b/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst @@ -0,0 +1,26 @@ +sfaira.genomes.ExtractFeatureListEnsemble +========================================= + +.. currentmodule:: sfaira.genomes + +.. autoclass:: ExtractFeatureListEnsemble + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~ExtractFeatureListEnsemble.__init__ + ~ExtractFeatureListEnsemble.from_ensemble_gtf + ~ExtractFeatureListEnsemble.reduce_types + ~ExtractFeatureListEnsemble.reduce_types_protein_coding + ~ExtractFeatureListEnsemble.write_gene_table_to_csv + + + + + + \ No newline at end of file diff --git a/docs/api/sfaira.models.celltype.rst b/docs/api/sfaira.models.celltype.rst new file mode 100644 index 000000000..48b1f74e2 --- /dev/null +++ b/docs/api/sfaira.models.celltype.rst @@ -0,0 +1,23 @@ +sfaira.models.celltype +====================== + +.. automodule:: sfaira.models.celltype + + + + + + + + + + + + + + + + + + + diff --git a/docs/api/sfaira.models.embedding.rst b/docs/api/sfaira.models.embedding.rst new file mode 100644 index 000000000..2446b787d --- /dev/null +++ b/docs/api/sfaira.models.embedding.rst @@ -0,0 +1,23 @@ +sfaira.models.embedding +======================= + +.. automodule:: sfaira.models.embedding + + + + + + + + + + + + + + + + + + + diff --git a/docs/api/sfaira.train.GridsearchContainer.rst b/docs/api/sfaira.train.GridsearchContainer.rst new file mode 100644 index 000000000..ae233fecf --- /dev/null +++ b/docs/api/sfaira.train.GridsearchContainer.rst @@ -0,0 +1,37 @@ +sfaira.train.GridsearchContainer +================================ + +.. currentmodule:: sfaira.train + +.. autoclass:: GridsearchContainer + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~GridsearchContainer.__init__ + ~GridsearchContainer.best_model_by_partition + ~GridsearchContainer.get_best_model_ids + ~GridsearchContainer.load_gs + ~GridsearchContainer.load_y + ~GridsearchContainer.plot_best_model_by_hyperparam + ~GridsearchContainer.plot_completions + ~GridsearchContainer.plot_training_history + ~GridsearchContainer.save_best_weight + ~GridsearchContainer.write_best_hyparam + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~GridsearchContainer.cv_keys + + \ No newline at end of file diff --git a/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst b/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst new file mode 100644 index 000000000..eeb1cb5a9 --- /dev/null +++ b/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst @@ -0,0 +1,43 @@ +sfaira.train.SummarizeGridsearchCelltype +======================================== + +.. currentmodule:: sfaira.train + +.. autoclass:: SummarizeGridsearchCelltype + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~SummarizeGridsearchCelltype.__init__ + ~SummarizeGridsearchCelltype.best_model_by_partition + ~SummarizeGridsearchCelltype.best_model_celltype + ~SummarizeGridsearchCelltype.create_summary_tab + ~SummarizeGridsearchCelltype.get_best_model_ids + ~SummarizeGridsearchCelltype.load_gs + ~SummarizeGridsearchCelltype.load_ontology_names + ~SummarizeGridsearchCelltype.load_y + ~SummarizeGridsearchCelltype.plot_best + ~SummarizeGridsearchCelltype.plot_best_classwise_heatmap + ~SummarizeGridsearchCelltype.plot_best_classwise_scatter + ~SummarizeGridsearchCelltype.plot_best_model_by_hyperparam + ~SummarizeGridsearchCelltype.plot_completions + ~SummarizeGridsearchCelltype.plot_training_history + ~SummarizeGridsearchCelltype.save_best_weight + ~SummarizeGridsearchCelltype.write_best_hyparam + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~SummarizeGridsearchCelltype.cv_keys + + \ No newline at end of file diff --git a/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst b/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst new file mode 100644 index 000000000..5ef0ddf33 --- /dev/null +++ b/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst @@ -0,0 +1,47 @@ +sfaira.train.SummarizeGridsearchEmbedding +========================================= + +.. currentmodule:: sfaira.train + +.. autoclass:: SummarizeGridsearchEmbedding + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~SummarizeGridsearchEmbedding.__init__ + ~SummarizeGridsearchEmbedding.best_model_by_partition + ~SummarizeGridsearchEmbedding.best_model_embedding + ~SummarizeGridsearchEmbedding.create_summary_tab + ~SummarizeGridsearchEmbedding.get_best_model_ids + ~SummarizeGridsearchEmbedding.get_gradients_by_celltype + ~SummarizeGridsearchEmbedding.load_gs + ~SummarizeGridsearchEmbedding.load_y + ~SummarizeGridsearchEmbedding.plot_active_latent_units + ~SummarizeGridsearchEmbedding.plot_best + ~SummarizeGridsearchEmbedding.plot_best_model_by_hyperparam + ~SummarizeGridsearchEmbedding.plot_completions + ~SummarizeGridsearchEmbedding.plot_gradient_cor + ~SummarizeGridsearchEmbedding.plot_gradient_distr + ~SummarizeGridsearchEmbedding.plot_npc + ~SummarizeGridsearchEmbedding.plot_training_history + ~SummarizeGridsearchEmbedding.save_best_weight + ~SummarizeGridsearchEmbedding.write_best_hyparam + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~SummarizeGridsearchEmbedding.List + ~SummarizeGridsearchEmbedding.Union + ~SummarizeGridsearchEmbedding.cv_keys + + \ No newline at end of file diff --git a/docs/api/sfaira.train.TargetZoos.rst b/docs/api/sfaira.train.TargetZoos.rst new file mode 100644 index 000000000..bbf18cd74 --- /dev/null +++ b/docs/api/sfaira.train.TargetZoos.rst @@ -0,0 +1,24 @@ +sfaira.train.TargetZoos +======================= + +.. currentmodule:: sfaira.train + +.. autoclass:: TargetZoos + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~TargetZoos.__init__ + ~TargetZoos.write_celltypes_tocsv_human + ~TargetZoos.write_celltypes_tocsv_mouse + + + + + + \ No newline at end of file diff --git a/docs/api/sfaira.train.TrainModelCelltype.rst b/docs/api/sfaira.train.TrainModelCelltype.rst new file mode 100644 index 000000000..cde6646e2 --- /dev/null +++ b/docs/api/sfaira.train.TrainModelCelltype.rst @@ -0,0 +1,36 @@ +sfaira.train.TrainModelCelltype +=============================== + +.. currentmodule:: sfaira.train + +.. autoclass:: TrainModelCelltype + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~TrainModelCelltype.__init__ + ~TrainModelCelltype.human_target + ~TrainModelCelltype.init_estim + ~TrainModelCelltype.mouse_target + ~TrainModelCelltype.save + ~TrainModelCelltype.save_eval + ~TrainModelCelltype.set_data + ~TrainModelCelltype.write_celltypes_tocsv_human + ~TrainModelCelltype.write_celltypes_tocsv_mouse + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~TrainModelCelltype.adata + + \ No newline at end of file diff --git a/docs/api/sfaira.train.TrainModelEmbedding.rst b/docs/api/sfaira.train.TrainModelEmbedding.rst new file mode 100644 index 000000000..e7c1b6be8 --- /dev/null +++ b/docs/api/sfaira.train.TrainModelEmbedding.rst @@ -0,0 +1,36 @@ +sfaira.train.TrainModelEmbedding +================================ + +.. currentmodule:: sfaira.train + +.. autoclass:: TrainModelEmbedding + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~TrainModelEmbedding.__init__ + ~TrainModelEmbedding.human_target + ~TrainModelEmbedding.init_estim + ~TrainModelEmbedding.mouse_target + ~TrainModelEmbedding.save + ~TrainModelEmbedding.save_eval + ~TrainModelEmbedding.set_data + ~TrainModelEmbedding.write_celltypes_tocsv_human + ~TrainModelEmbedding.write_celltypes_tocsv_mouse + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~TrainModelEmbedding.adata + + \ No newline at end of file diff --git a/docs/api/sfaira.ui.UserInterface.rst b/docs/api/sfaira.ui.UserInterface.rst new file mode 100644 index 000000000..d14d56879 --- /dev/null +++ b/docs/api/sfaira.ui.UserInterface.rst @@ -0,0 +1,38 @@ +sfaira.ui.UserInterface +======================= + +.. currentmodule:: sfaira + +.. autoclass:: ui.UserInterface + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~ui.UserInterface.__init__ + ~ui.UserInterface.celltype_summary + ~ui.UserInterface.compute_all + ~ui.UserInterface.compute_all_kipoi + ~ui.UserInterface.compute_celltype + ~ui.UserInterface.compute_celltype_kipoi + ~ui.UserInterface.compute_denoised_expression + ~ui.UserInterface.compute_denoised_expression_kipoi + ~ui.UserInterface.compute_embedding + ~ui.UserInterface.compute_embedding_kipoi + ~ui.UserInterface.deposit_zenodo + ~ui.UserInterface.filter_cells + ~ui.UserInterface.get_references + ~ui.UserInterface.load_data + ~ui.UserInterface.load_model_celltype + ~ui.UserInterface.load_model_embedding + ~ui.UserInterface.write_lookuptable + + + + + + \ No newline at end of file diff --git a/docs/changelog.rst b/docs/changelog.rst new file mode 100644 index 000000000..598481d8d --- /dev/null +++ b/docs/changelog.rst @@ -0,0 +1,21 @@ +Changelog +========== + +.. role:: small +.. role:: smaller + +This project adheres to `Semantic Versioning `_. + +0.2.1 :small:`2020-09-7` +~~~~~~~~~~~~~~~~~~~~~~~~ + +**Added** + +* Initial release with online documentation. + +**Fixed** + +**Dependencies** + +**Deprecated** + diff --git a/docs/data.rst b/docs/data.rst index 21ac5972c..1cf1f118a 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -12,11 +12,11 @@ Build a repository structure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1. Choose a directory to dedicate to the data base, called root in the following. - 2. Make subfolders in root for each organism for which you want to build a data base. - 3. Make subfolders for each organ whithin each organism for which you want to build a data base. + 2. Run the sfaira download script (sfaira.data.utils.download_all). Alternatively, you can manually set up a data base by making subfolders for each study. -We maintain a couple of download scripts that automatise this process, which have to be executed in a shell once to download specific subsets of the full data zoo. -These scripts can be found in sfaira.data.download_scripts. +Note that the automated download is a feature of sfaira but not the core purpose of the package: +Sfaira allows you efficiently interact with such a local data repository. +Some data sets cannot be automatically downloaded and need you manual intervention, which we report in the download script output. Use 3rd party repositories ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -31,26 +31,40 @@ Contact us for support of any other repositories. Add data sets ~~~~~~~~~~~~~ - 4. For each species and organ combination, choose the data sets that you want to use. - 5. Identify the raw files as indicated in the data loader classes and copy them into the folder. Use processed data - using the described processing if this is required: This is usually done to speed up loading for file - formats that are difficult to access. - -Data loaders ------------- + 1. Write a data loader as outlined below. + 2. Identify the raw files as indicated in the data loader classes and copy them into your directory structure as required by your data laoder. + 3. You can contribute the data loader to public sfaira, we do not manage data upload though. During publication, you would upload this data set to a server like GEO and the dataloader contributed to sfaira would use this download link. Use data loaders on existing data repository -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-------------------------------------------- You only want to use data sets with existing data loaders and have adapted your directory structure as above? In that case, you can immediately start using the data loader functions, you just need to supply the root directory of the directory structure as `path to the constructor of the class that you are using. -Depending on the functionalities you want to use, you need to create a directory with data set meta data first. This -can be easily done via the data set api itself, example python scripts are under benchmarks/data_preparation. This -meta information is necessary to anticipate file sizes for backing merged adata objects for example. +Depending on the functionalities you want to use, you would often want to create a directory with cached meta data +first. This can be easily done via the script sfaira.data.utils.create_meta.py. This meta information is necessary to +anticipate file sizes for backing merged adata objects, for example, and is used for lazy loading. + +Write data loaders +------------------ + +The study-centric data loader module +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the sfaira code, data loaders are organised into directories, which correspond to publications. +All data loaders corresponding to data sets of one study are grouped into this directory. +This directory contains an `__init__.py` file which makes these data loaders visible to sfaira: + +.. code-block:: python -Contribute data loaders -~~~~~~~~~~~~~~~~~~~~~~~ + FILE_PATH = __file__ + + +Next, each data set is represented by one data loader python file in this directory. +See below for more complex set ups with repetitive data loader code. + +The data loader python file +~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each data set (organsism, organ, protocol, optionally also batches) has its own data loader class. Each such class is in a separate file and inherits from a base class that contains most functionalities. Accordingly, the data loader class @@ -68,14 +82,51 @@ before it is loaded into memory: meta_path: Union[str, None] = None, **kwargs ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = x # your-species - self.id = x # "organism_organ_year_protocoll_first-author_doi" - self.download_website = x # link to raw data - self.organ = x #y ourorgan - self.sub_tissue = x # sub-tissue name, otherwise organ - self.dev_stage = x # developmental stage of organism - self.has_celltypes = x # if cell type annotation is available + super().__init__(path=path, meta_path=meta_path, **kwargs) + # Data set meta data: You do not have to include all of these and can simply skip lines corresponding + # to attritbutes that you do not have access to. These are meta data on a sample level. + # The meta data attributes labeled with (*) may als be supplied per cell, see below, + # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. + + self.id = x # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + + self.author = x # author (list) who sampled / created the data set + self.doi = x # doi of data set accompanying manuscript + + self.download = x # download website(s) of data files + self.download_meta = x # download website(s) of meta data files + + self.age = x # (*, optional) age of sample + self.dev_stage = x # (*, optional) developmental stage of organism + self.ethnicity = x # (*, optional) ethnicity of sample + self.healthy = x # (*, optional) whether sample represents a healthy organism + self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") + self.organ = x # (*, optional) organ (anatomical structure) + self.organism = x # (*) species / organism + self.protocol = x # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.sex = x # (*, optional) sex + self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample + self.year = x # year in which sample was acquired + + # The following meta data may instead also be supplied on a cell level if an appropriate column is present in the + # anndata instance (specifically in .obs) after loading. + # You need to make sure this is loaded in the loading script)! + # See above for a description what these meta data attributes mean. + # Again, if these attributes are note available, you can simply leave this out. + self.obs_key_age = x # (optional, see above, do not provide if .age is provided) + self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) + self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) + self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) + self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) + self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) + self.obs_key_protocol = x # (optional, see above, do not provide if .protocol is provided) + self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) + self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) + # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. + # name of column which contain streamlined cell ontology cell type classes: + self.obs_key_cellontology_original = x # (optional) + # This cell type annotation is free text but is mapped to an ontology via a .csv file with the same name and + # directory as the python file of this data loader (see below). # A dictionary of dictionaries with: # One item for each annotation label that is not contained in the ontology. @@ -93,34 +144,38 @@ before it is loaded into memory: .. code-block:: python def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "human", "eye", "my_data.h5ad") defined file in streamlined directory structure self.adata = anndata.read(fn) # loading instruction into .adata, use other ones if the data is not h5ad + # Some times, you need to load multiple files (e.g. counts and annotation), all of this code would be here. - self.adata.uns["lab"] = x # load the adata.uns with meta data - self.adata.uns["year"] = x - self.adata.uns["doi"] = x - self.adata.uns["protocol"] = x # e.g. 10x, microwell, seqwell... - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = x - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - # Class expects unprocessed cell type labels in self.adata.obs["cell_ontology_class"] - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - # You can additional set self.adata.obs["cell_ontology_id"] if you have streamlined ontology IDs. This are also - # defined in the cell type universe lists. - self.adata.obs["healthy"] = x # boolean tissue sample healthy or diseased / treated - self.adata.obs["state_exact"] = x # exact tissue state as string, e.g. "tumor" or "healthy" +In summary, a simply example data loader for a mouse lung data set could look like this: - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') +.. code-block:: python + class MyDataset(DatasetBase) + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, **kwargs) + self.author = "me" + self.doi = "my preprint" + self.download = "my GEO upload" + self.normalisation = "raw" # because I uploaded raw counts, which is good practice! + self.organ = "lung" + self.organism = "mouse" + self.protocol = "smart-seq2" + self.year = "2020" + + self.obs_key_cellontology_original = "louvain_named" # i save my cell type names in here + + def _load(self, fn=None): + # assuming that i uploaded an h5ad somewhere (in self.download) + if fn is None: + fn = os.path.join(self.path, "mouse", "lung", "my.h5ad") + self.adata = anndata.read(fn) Data loaders can be added into a copy of the sfaira repository and can be used locally before they are contributed to @@ -130,43 +185,51 @@ in which local data and cell type annotation can be managed separately but still The data loaders and cell type annotation formats between sfaira and sfaira_extensions are identical and can be easily copied over. -Ontology management -------------------- +Map cell type labels to ontology +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The entries in `self.obs_key_cellontology_original` are free text but are mapped to an ontology via a .csv file with +the same name and directory as the python file in which the data loader is located. +This .csv contains two columns with one row for each unique cell type label and their free text identifiers in the first +column, and the corresponding ontology term in the second column. +You could write this file entirely from scratch. +Sfaira also allows you to generate a first guess of this file using fuzzy string matching via ToDo. +Conflicts are not resolved in this first guess and you have to manually decide which free text field corresponds to which +ontology term in the case of conflicts. +Still, this first guess usually drastically speeds up this annotation harmonization. -Sfaira maintains versioned cell type universes and ontologies by species and organ. -A cell type universe is a list of the unique, most fine-grained cell type definitions available. -These cell types can be referred to by a human readable cell type name or a structure identifier within an ontology, -an ontology ID. -Often, one is also interested in access to more coarse grained groups of cell types, for example if the data quality -does not allow to distinguish between T cell subtypes. -To allow coarser type definition, sfaira maintains hierarchies of cell types, in which each hierarchical level is again -defined by a cell type identifier. -Such a hierarchy can be writted as directed acyclic graph which has the cell type universe as its leave nodes. -Intuitively, the cell type hierarchy graph depends on the cell type universe. -Accordingly, both are versioned together in sfaira: -Updates in the cell type universe, such as discovery of a new cell type, lead to an update of the ontology and an -incrementation in both of their versions. -These versioned changes materialise as a distinct list (universe) and dictionary (ontology) for each version in the -file that harbors the species- and organ-specific class that inherits from CelltypeVersionsBase and thus are available -even after updates. -This versioning without depreceation of the old objects allows sfaira to execute and train models that were designed -for older cell type universes and thus ensures reproducibility. - -Contribute cell types to ontologies -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To contibute new cell types or change existing cell type universe entries, the cell type universe version has to be -incremented and the new entry can simply be added to the list or modified in the list. -We do not increment the universe version if a change does not influence the identity of a leave node with respect to -the other types in the universe, ie if it simply changes the spelling of a cell type or if an onology ID is added to -a type that previously did not have one. - -Contribute hierarchies to ontologies -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To contribute a term to a cell type ontology, one just has to add a dictionary item that defines the new term as a set -of the leave nodes (cell type universe) of the corresponding universe version. +Repetitive data loader code +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There are instances in which you find yourself copying code between data loader files corresponding to one study. +In most of these cases, you can avoid the copy operations and share the code more efficiently. + +If you have multiple data files which each correspond to a data set and are structured similarly, you can define a super +class which contains the shared constructor and `_load()` code, from which each data set specific loader inherits. +ToDo: Example. + +If you have a single file which contains the data from multiple data sets which belong to a data loader each, +because of different meta data or batches for example, +you can set up a `group.py` file which defines a DatasetGroup for this study, which controls the generation of Datasets. +ToDo: Example. + +Cell type ontology management +----------------------------- + +Sfaira maintains a wrapper of the Cell Ontology as a class which allows additions to this ontology. +This allows us to use the core ontology used in the community as a backbone and to keep up with newly identifed cell types on our own. +We require all extensions of the core ontology not to break the directed acyclic graph that is the ontology: +Usually, such extensions would be additional leave nodes. + +Second, we maintain cell type universes for anatomic structures. +These are dedicated for cell type-dependent models which require a defined set of cell types. +Such a universe is a set of nodes in the ontology. + +Contribute cell types to ontology +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Please open an issue on the sfaira repo with a description what type of cell type you want to add. Using ontologies to train cell type classifiers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -176,6 +239,17 @@ cross-entropy as a loss and aggregate accuracy as a metric. The one-hot encoded cell type label matrix is accordingly modified in the estimator class in data loading if terms that correspond to intermediate nodes (rather than leave nodes) are encountered in the label set. +Metadata management +------------------- + +We constrain meta data by ontologies where possible. The current restrictions are: + + - .organism must either mouse or human. + +Follow this issue_ for details on upcoming ontology integrations. + +.. _issue: https://github.com/theislab/sfaira/issues/16 + Genome management ----------------- diff --git a/docs/index.rst b/docs/index.rst index c699c8c1d..497452180 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -17,7 +17,7 @@ sfaira - data and model repository for single-cell data sfaira_ is a model and a data repository in a single python package. We provide an interactive overview of the current state of the zoos on sfaira-site_. -.. _sfaira: https://sfaira.readthedocs.io +.. _sfaira: https://www.biorxiv.org/content/10.1101/2020.12.16.419036v1 .. _sfaira-site: https://theislab.github.io/sfaira-site/index.html .. include:: environment_brief.rst @@ -43,4 +43,4 @@ Latest additions models ecosystem roadmap - release-notes + changelog diff --git a/docs/release-latest.rst b/docs/release-latest.rst deleted file mode 100644 index 913476eb3..000000000 --- a/docs/release-latest.rst +++ /dev/null @@ -1,6 +0,0 @@ -.. role:: small -.. role:: smaller - -0.2.1 :small:`2020-09-7` -~~~~~~~~~~~~~~~~~~~~~~~~ -Initial release with online documentation. diff --git a/docs/release-notes.rst b/docs/release-notes.rst deleted file mode 100644 index 7f8561271..000000000 --- a/docs/release-notes.rst +++ /dev/null @@ -1,11 +0,0 @@ -Release Notes -============= - -.. role:: small -.. role:: smaller - - -Version 0.2 ------------ - -.. include:: release-latest.rst diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 000000000..5a833b05d --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,7 @@ +sphinx +matplotlib +sphinx_rtd_theme +sphinx-autodoc-typehints +jinja2 +docutils +-r ../requirements.txt \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2ecab3f17..cee7a6606 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,12 +4,14 @@ jinja2 loompy numpy>=1.14.0 matplotlib +openpyxl pandas scanpy scipy seaborn sphinx -sphinx-autodoc-typehints sphinx_rtd_theme +tensorflow # TODO remove as soon as # 70 is solved tqdm requests +xlrd==1.* diff --git a/setup.cfg b/setup.cfg index 9320da5cc..2b1c2738b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,6 @@ +[bdist_wheel] +universal = 1 + [versioneer] VCS = git style = pep440 @@ -7,3 +10,15 @@ tag_prefix = [build_ext] inplace = 1 + +[flake8] +ignore=F401, W504 +exclude = docs +max-line-length = 160 + +[aliases] +# Define setup.py command aliases here +test = pytest + +[tool:pytest] +collect_ignore = ['setup.py'] \ No newline at end of file diff --git a/setup.py b/setup.py index 95e4de8ce..71270b465 100644 --- a/setup.py +++ b/setup.py @@ -3,10 +3,10 @@ author = 'theislab' author_email = 'david.fischer@helmholtz-muenchen.de' -description = "" +description = "sfaira is a model and a data repository for single-cell data in a single python package." with open("README.rst", "r") as fh: - long_description = fh.read() + long_description = fh.read() setup( name='sfaira', @@ -15,6 +15,15 @@ description=description, long_description=long_description, long_description_content_type="text/markdown", + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: BSD License', + 'Natural Language :: English', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + ], packages=find_packages(), install_requires=[ 'anndata>=0.7', @@ -22,12 +31,12 @@ 'numpy>=1.16.4', 'pandas', 'scipy>=1.2.1', - 'tqdm' + 'tqdm', + 'tensorflow>=2.0.0' # TODO Remove and add to tensorflow profile ], extras_require={ 'tensorflow': [ - 'tensorflow>=2.0.0', - 'tensorflow-gpu>=2.0.0' + # 'tensorflow>=2.0.0' # TODO Add Tensorflow here again ], 'kipoi': [ 'kipoi', @@ -38,25 +47,16 @@ "matplotlib", "sklearn" ], - 'scanpy': [ - "scanpy" - ], - 'loompy': [ + 'data': [ + "scanpy", "loompy", + "requests", + "xlrd==1.*", + "openpyxl", ], 'extension': [ "sfaira_extension", ], - 'zenodo': [ - "requests" - ], - 'docs': [ - 'sphinx', - 'sphinx-autodoc-typehints', - 'sphinx_rtd_theme', - 'jinja2', - 'docutils', - ], }, version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), diff --git a/sfaira/__init__.py b/sfaira/__init__.py index 319b4e98f..feb4fbd59 100644 --- a/sfaira/__init__.py +++ b/sfaira/__init__.py @@ -1,6 +1,12 @@ # -*- coding: utf-8 -*- """A Data and Model Zoo for Single-Cell Genomics.""" +import sfaira.interface as ui +import sfaira.train +import sfaira.models +import sfaira.genomes +import sfaira.data +import sfaira.consts as consts from ._version import get_versions __version__ = get_versions()['version'] @@ -17,10 +23,3 @@ "leander.dony@helmholtz-muenchen.de", "david.fischer@helmholtz-muenchen.de" ]) - -import sfaira.consts as consts -import sfaira.data -import sfaira.genomes -import sfaira.models -import sfaira.train -import sfaira.interface as ui diff --git a/sfaira/consts/__init__.py b/sfaira/consts/__init__.py index 15d95e718..1505f6ec2 100644 --- a/sfaira/consts/__init__.py +++ b/sfaira/consts/__init__.py @@ -1,3 +1,3 @@ -from sfaira.consts.adata_fields import ADATA_IDS_CELLXGENE, ADATA_IDS_SFAIRA from sfaira.consts.adata_fields import ADATA_IDS_BASE, ADATA_IDS_EXTENDED +from sfaira.consts.adata_fields import ADATA_IDS_CELLXGENE, ADATA_IDS_SFAIRA from sfaira.consts.meta_data_files import META_DATA_FIELDS diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index fdac269bc..1eee5e775 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -1,3 +1,6 @@ +import numpy as np +from typing import List + """ The classes in this file are containers of field names and element entries that are used in streamlined adata objects in sfaira and in associated data bases. @@ -15,6 +18,7 @@ class ADATA_IDS_BASE: _cell_ontology_id: str _doi: str _download: str + _download_meta: str _dataset: str _dataset_group: str _gene_id_ensembl: str @@ -25,9 +29,8 @@ class ADATA_IDS_BASE: _ncells: str _normalization: str _organ: str + _organism: str _protocol: str - _species: str - _subtissue: str _year: str @property @@ -66,6 +69,10 @@ def doi(self) -> str: def download(self) -> str: return self._download + @property + def download_meta(self) -> str: + return self._download_meta + @property def gene_id_ensembl(self) -> str: return self._gene_id_ensembl @@ -98,21 +105,17 @@ def ncells(self) -> str: def normalization(self) -> str: return self._normalization - @property - def protocol(self) -> str: - return self._protocol - @property def organ(self) -> str: return self._organ @property - def species(self) -> str: - return self._species + def organism(self) -> str: # TODO refactor into organism + return self._organism @property - def subtissue(self) -> str: - return self._subtissue + def protocol(self) -> str: + return self._protocol @property def year(self) -> str: @@ -165,6 +168,7 @@ def __init__(self): self._dataset = "dataset" self._dataset_group = "dataset_group" self._download = "download" + self._download_meta = "download_meta" self._gene_id_ensembl = "ensembl" self._gene_id_index = "ensembl" self._gene_id_names = "names" @@ -173,9 +177,8 @@ def __init__(self): self._ncells = "ncells" self._normalization = "normalization" self._organ = "organ" + self._organism = "organism" self._protocol = "protocol" - self._species = "organism" - self._subtissue = "subtissue" self._year = "year" self._age = "age" @@ -184,6 +187,39 @@ def __init__(self): self._sex = "sex" self._state_exact = "state_exact" + self._load_raw = "load_raw" + self._mapped_features = "mapped_features" + self._remove_gene_version = "remove_gene_version" + + # Allowed field values: + self.age_allowed_entries = None + self.dev_stage_allowed_entries = None + self.ethnicity_allowed_entries = None + self.normalization_allowed_entries = None + self.organ_allowed_entries = None + self.organism_allowed_entries = ["mouse", "human"] + self.protocol_allowed_entries = None + self.sex_allowed_entries = None + self.subtissue_allowed_entries = None + self.year_allowed_entries = list(range(2000, 3000)) + # Free fields that are not constrained: + # _author, _download, _download_meta, _doi, _id, _state_exact + + self.unknown_celltype_name = "unknown" + self.unknown_celltype_identifiers = ["nan", "none", "unknown", np.nan, None] + + @property + def load_raw(self) -> str: + return self._load_raw + + @property + def mapped_features(self) -> str: + return self._mapped_features + + @property + def remove_gene_version(self) -> str: + return self._remove_gene_version + class ADATA_IDS_CELLXGENE(ADATA_IDS_EXTENDED): """ @@ -192,6 +228,7 @@ class ADATA_IDS_CELLXGENE(ADATA_IDS_EXTENDED): """ _author_names: str _disease_state_healthy: str + accepted_file_names: List[str] def __init__(self): self._cell_types_original = "free_annotation" @@ -201,6 +238,7 @@ def __init__(self): self._dataset = "dataset" self._dataset_group = "dataset_group" self._download = "" # TODO + self._download_meta = "" # never necessary as we interface via anndata objects self._gene_id_ensembl = "" # TODO self._gene_id_index = "ensembl" self._gene_id_names = "" # TODO @@ -208,11 +246,10 @@ def __init__(self): self._healthy = None # is inferred from _disease self._id = "" # TODO self._ncells = "ncells" - self._normalization = None # is always "counts" + self._normalization = "" # is always "raw" self._organ = "" # TODO + self._organism = "organism" self._protocol = "assay" - self._species = "organism" - self._subtissue = "" # TODO self._year = "" # TODO self._age = "age" @@ -226,6 +263,11 @@ def __init__(self): self._disease_state_healthy = "normal" self._author_names = "names" + # accepted file names + self.accepted_file_names = [ + "krasnow_lab_human_lung_cell_atlas_smartseq2-2-remixed.h5ad", + ] + @property def author_names(self) -> str: return self._author_names diff --git a/sfaira/consts/meta_data_files.py b/sfaira/consts/meta_data_files.py index 8478aba28..7adfd59d8 100644 --- a/sfaira/consts/meta_data_files.py +++ b/sfaira/consts/meta_data_files.py @@ -2,16 +2,20 @@ The classes contains constants related to sfaira streamlined meta data files. """ -META_DATA_FIELDS = [ - "annotated", - "author", - "doi", - "download", - "id", - "ncells", - "normalization", - "organ", - "protocol", - "species", - "year", -] +META_DATA_FIELDS = { + "annotated": bool, + "author": str, + "cell_ontology_class": str, + "doi": str, + "download": str, + "download_meta": str, + "healthy": bool, + "id": str, + "ncells": str, + "normalization": str, + "organ": str, + "protocol": str, + "organism": str, + "state_exact": str, + "year": int, +} diff --git a/sfaira/data/__init__.py b/sfaira/data/__init__.py index 896eb5de2..ac96ab48f 100644 --- a/sfaira/data/__init__.py +++ b/sfaira/data/__init__.py @@ -1,4 +1,6 @@ -from .base import DatasetBase, DatasetGroupBase, DatasetSuperGroup -from . import mouse -from . import human +from .base import DatasetBase, DatasetBaseGroupLoading, \ + DatasetGroup, DatasetGroupDirectoryOriented, \ + DatasetSuperGroup +from . import dataloaders +from .dataloaders import DatasetSuperGroupSfaira from .interactive import DatasetInteractive diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 996dbdb94..73e071aaf 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -1,81 +1,235 @@ +from __future__ import annotations + import abc import anndata import h5py +import multiprocessing import numpy as np import pandas as pd import os from os import PathLike import pandas +import pydoc import scipy.sparse -from typing import Dict, List, Union +from typing import Dict, List, Tuple, Union import warnings from .external import SuperGenomeContainer from .external import ADATA_IDS_SFAIRA, META_DATA_FIELDS +UNS_STRING_META_IN_OBS = "__obs__" + + +def map_fn(inputs): + ds, formatted_version, remove_gene_version, match_to_reference, load_raw, allow_caching, func, \ + kwargs_func = inputs + try: + ds.load( + celltype_version=formatted_version, + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=load_raw, + allow_caching=allow_caching, + ) + if func is not None: + x = func(ds, **kwargs_func) + ds.clear() + return x + else: + return None + except FileNotFoundError as e: + return ds.id, e, + class DatasetBase(abc.ABC): adata: Union[None, anndata.AnnData] class_maps: dict - meta: Union[None, pandas.DataFrame] - download_website_meta: Union[None, str] + _meta: Union[None, pandas.DataFrame] path: Union[None, str] + meta_path: Union[None, str] + cache_path: Union[None, str] id: Union[None, str] genome: Union[None, str] - _annotated: Union[None, bool] + _age: Union[None, str] _author: Union[None, str] + _dev_stage: Union[None, str] _doi: Union[None, str] - _download: Union[None, str] + _download: Union[Tuple[List[None]], Tuple[List[str]]] + _download_meta: Union[Tuple[List[None]], Tuple[List[str]]] + _ethnicity: Union[None, str] + _healthy: Union[None, bool] _id: Union[None, str] _ncells: Union[None, int] _normalization: Union[None, str] _organ: Union[None, str] + _organism: Union[None, str] _protocol: Union[None, str] - _species: Union[None, str] - _year: Union[None, str] + _sex: Union[None, str] + _source: Union[None, str] + _state_exact: Union[None, str] + _year: Union[None, int] + + _obs_key_age: Union[None, str] + _obs_key_cellontology_id: Union[None, str] + _obs_key_cellontology_original: Union[None, str] + _obs_key_dev_stage: Union[None, str] + _obs_key_ethnicity: Union[None, str] + _obs_key_healthy: Union[None, str] + _obs_key_healthy: Union[None, str] + _obs_key_organ: Union[None, str] + _obs_key_organism: Union[None, str] + _obs_key_protocol: Union[None, str] + _obs_key_sex: Union[None, str] + _obs_key_state_exact: Union[None, str] + + _healthy_state_healthy: Union[None, str] + + _var_symbol_col: Union[None, str] + _var_ensembl_col: Union[None, str] def __init__( self, path: Union[str, None] = None, meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, **kwargs ): + self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() + self._META_DATA_FIELDS = META_DATA_FIELDS + self.adata = None - self.download_website_meta = None self.meta = None self.genome = None self.path = path self.meta_path = meta_path - self._load_raw = None + self.cache_path = cache_path - self._annotated = None + self._age = None self._author = None + self._dev_stage = None self._doi = None self._download = None + self._download_meta = None + self._ethnicity = None + self._healthy = None self._id = None self._ncells = None self._normalization = None self._organ = None + self._organism = None self._protocol = None - self._species = None + self._sex = None + self._source = None + self._state_exact = None self._year = None - self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() - self._META_DATA_FIELDS = META_DATA_FIELDS + self._obs_key_age = None + self._obs_key_cellontology_id = None + self._obs_key_cellontology_original = None + self._obs_key_dev_stage = None + self._obs_key_ethnicity = None + self._obs_key_healthy = None + self._obs_key_organ = None + self._obs_key_organism = None + self._obs_key_protocol = None + self._obs_key_sex = None + self._obs_key_state_exact = None + + self._healthy_state_healthy = None + + self._var_symbol_col = None + self._var_ensembl_col = None + + self.class_maps = {"0": {}} + self._unknown_celltype_identifiers = self._ADATA_IDS_SFAIRA.unknown_celltype_identifiers @abc.abstractmethod def _load(self, fn): pass + def _download(self, fn): + pass + + @property + def _directory_formatted_doi(self) -> str: + return "d" + "_".join("_".join("_".join(self.doi.split("/")).split(".")).split("-")) + + @property + def _directory_formatted_id(self) -> str: + return "_".join("_".join(self.id.split("/")).split(".")) + + def clear(self): + """ + Remove loaded .adata to reduce memory footprint. + + :return: + """ + import gc + self.adata = None + gc.collect() + + def set_raw_full_group_object(self, fn=None, adata_group: Union[None, anndata.AnnData] = None) -> bool: + """ + Only relevant for DatasetBaseGroupLoading but has to be a method of this class + because it is used in DatasetGroup. + + :param fn: + :param adata_group: + :return: Whether group loading is used. + """ + return False + + def _load_cached( + self, + fn: str, + load_raw: bool, + allow_caching: bool, + ): + """ + Wraps data set specific load and allows for caching. + + Cache is written into director named after doi and h5ad named after data set id. + + :param load_raw: Loads unprocessed version of data if available in data loader. + :param allow_caching: Whether to allow method to cache adata object for faster re-loading. + :return: + """ + if fn is None and self.path is None: + raise ValueError("provide either fn in load or path in constructor") + + assert self.cache_path is not None, "set self.cache_path first" + assert self._directory_formatted_doi is not None, "set self.doi first" + assert self._directory_formatted_id is not None, "set self.id first" + fn_cache = os.path.join( + self.cache_path, + self._directory_formatted_doi, + self._directory_formatted_id + ".h5ad" + ) + # Check if raw loader has to be called: + if load_raw or not os.path.exists(fn_cache): + self._load(fn=fn) + else: + assert self.cache_path is not None, "set cache_path to use caching" + assert os.path.exists(fn_cache), f"did not find cache file {fn_cache}, consider caching first" + self.adata = anndata.read_h5ad(fn_cache) + # Check if file needs to be cached: + if allow_caching and not os.path.exists(fn_cache): + assert self.cache_path is not None, "set cache_path to use caching" + dir_cache = os.path.dirname(fn_cache) + if not os.path.exists(dir_cache): + os.makedirs(dir_cache) + self.adata.write_h5ad(fn_cache) + def load( self, celltype_version: Union[str, None] = None, fn: Union[str, None] = None, remove_gene_version: bool = True, match_to_reference: Union[str, None] = None, - load_raw: bool = False + load_raw: bool = False, + allow_caching: bool = True, ): """ @@ -85,50 +239,124 @@ def load( data sets are superimposed. :param match_to_reference: Reference genomes name. :param load_raw: Loads unprocessed version of data if available in data loader. + :param allow_caching: Whether to allow method to cache adata object for faster re-loading. :return: """ - self._load_raw = load_raw - if match_to_reference and not remove_gene_version: warnings.warn("it is not recommended to enable matching the feature space to a genomes reference" "while not removing gene versions. this can lead to very poor matching performance") - # set default genomes if none provided + # Set default genomes per organism if none provided: if match_to_reference: genome = match_to_reference - self._set_genome(genome=genome) - elif self.species == "human": + elif self.organism == "human": genome = "Homo_sapiens_GRCh38_97" warnings.warn(f"using default genomes {genome}") - self._set_genome(genome=genome) - elif self.species == "mouse": + elif self.organism == "mouse": genome = "Mus_musculus_GRCm38_97" warnings.warn(f"using default genomes {genome}") - self._set_genome(genome=genome) + else: + raise ValueError(f"genome was not supplied and organism {self.organism} " + f"was not matched to a default choice") + self._set_genome(genome=genome) + + # Run data set-specific loading script: + self._load_cached(fn=fn, load_raw=load_raw, allow_caching=allow_caching) + # Set data-specific meta data in .adata: + self._set_metadata_in_adata(celltype_version=celltype_version) + # Set loading hyper-parameter-specific meta data: + self.adata.uns[self._ADATA_IDS_SFAIRA.load_raw] = load_raw + self.adata.uns[self._ADATA_IDS_SFAIRA.mapped_features] = match_to_reference + self.adata.uns[self._ADATA_IDS_SFAIRA.remove_gene_version] = remove_gene_version + # Streamline feature space: + self._convert_and_set_var_names() + self._collapse_gene_versions(remove_gene_version=remove_gene_version) + self._match_features_to_reference(match_to_reference=match_to_reference) + + def _convert_and_set_var_names( + self, + symbol_col: str = None, + ensembl_col: str = None, + ): + # Use defaults defined in data loader if none given to this function. + if symbol_col is None: + symbol_col = self.var_symbol_col + if ensembl_col is None: + ensembl_col = self.var_ensembl_col + if not ensembl_col and not symbol_col: + raise ValueError('Please provide the name of at least the name of the var column containing ensembl ids or' + 'the name of the var column containing gene symbols') + # Process given gene names: Full gene names ("symbol") or ENSEMBL IDs ("ensembl"). + # Below the .var column that contain the target IDs are renamed to follow streamlined naming. + # If the IDs were contained in the index, a new column is added to .var. + if symbol_col: + if symbol_col == 'index': + self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names] = self.adata.var.index.values.tolist() + else: + assert symbol_col in self.adata.var.columns, f"symbol_col {symbol_col} not found in .var" + self.adata.var = self.adata.var.rename( + {symbol_col: self._ADATA_IDS_SFAIRA.gene_id_names}, + axis='columns' + ) + if ensembl_col: + if ensembl_col == 'index': + self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = self.adata.var.index.values.tolist() + else: + assert ensembl_col in self.adata.var.columns, f"ensembl_col {ensembl_col} not found in .var" + self.adata.var = self.adata.var.rename( + {ensembl_col: self._ADATA_IDS_SFAIRA.gene_id_ensembl}, + axis='columns' + ) + # If only symbol or ensembl was supplied, the other one is inferred ia a genome mapping dictionary. + if not ensembl_col: + id_dict = self.genome_container.names_to_id_dict + id_strip_dict = self.genome_container.strippednames_to_id_dict + # Matching gene names to ensembl ids in the following way: if the gene is present in the ensembl dictionary, + # match it straight away, if it is not in there we try to match everything in front of the first period in + # the gene name with a dictionary that was modified in the same way, if there is still no match we append na + ensids = [] + for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names]: + if n in id_dict.keys(): + ensids.append(id_dict[n]) + elif n.split(".")[0] in id_strip_dict.keys(): + ensids.append(id_strip_dict[n.split(".")[0]]) + else: + ensids.append('n/a') + self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = ensids - self._load(fn=fn) + if not symbol_col: + id_dict = self.genome_container.id_to_names_dict + self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names] = [ + id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' + for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] + ] - if self._ADATA_IDS_SFAIRA.cell_ontology_id not in self.adata.obs.columns: - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_id] = None + # Lastly, the index of .var is set to ensembl IDs. + try: # debugging + self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_index].values.tolist() + except KeyError as e: + raise KeyError(e) - # Map cell type names from raw IDs to ontology maintained ones:: - if self._ADATA_IDS_SFAIRA.cell_ontology_class in self.adata.obs.columns: - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.map_ontology_class( - raw_ids=self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values, - celltype_version=celltype_version - ) + self.adata.var_names_make_unique() - # Remove version tag on ensembl gene ID so that different versions are superimposed downstream: + def _collapse_gene_versions(self, remove_gene_version): + """ + Remove version tag on ensembl gene ID so that different versions are superimposed downstream. + + :param remove_gene_version: + :return: + """ if remove_gene_version: new_index = [x.split(".")[0] for x in self.adata.var_names.tolist()] # Collapse if necessary: new_index_collapsed = list(np.unique(new_index)) if len(new_index_collapsed) < self.adata.n_vars: - raise ValueError("duplicate features detected after removing gene versions." - "the code to collapse these features is implemented but not tested.") + print("WARNING: duplicate features detected after removing gene versions." + "the code to collapse these features is implemented but not tested.") idx_map = np.array([new_index_collapsed.index(x) for x in new_index]) # Need reverse sorting to find index of last element in sorted list to split array using list index(). - idx_map_sorted_rev = np.argsort(idx_map)[::-1] + idx_map_sorted_fwd = np.argsort(idx_map) + idx_map_sorted_rev = idx_map_sorted_fwd[::-1].tolist() n_genes = len(idx_map_sorted_rev) # 1. Sort array in non-reversed order: idx_map_sorted_rev[::-1] # 2. Split into chunks based on blocks of identical entries in idx_map, using the occurrence of the @@ -138,10 +366,10 @@ def load( counts = np.concatenate([ np.sum(x, axis=1, keepdims=True) for x in np.split( - self.adata[:, idx_map_sorted_rev[::-1]].X, # forward ordered data + self.adata[:, idx_map_sorted_fwd].X, # forward ordered data indices_or_sections=[ n_genes - 1 - idx_map_sorted_rev.index(x) # last occurrence of element in forward order - for x in np.arange(0, len(new_index_collapsed)-1) # -1: do not need end of last partition + for x in np.arange(0, len(new_index_collapsed) - 1) # -1: do not need end of last partition ], axis=1 ) @@ -161,7 +389,13 @@ def load( self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = new_index self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl].values - # Match feature space to a genomes provided with sfaira + def _match_features_to_reference(self, match_to_reference): + """ + Match feature space to a genomes provided with sfaira + + :param match_to_reference: + :return: + """ if match_to_reference: # Convert data matrix to csc matrix if isinstance(self.adata.X, np.ndarray): @@ -198,100 +432,80 @@ def load( x_new = x_new.tocsr() self.adata = anndata.AnnData( - X=x_new, - obs=self.adata.obs, - obsm=self.adata.obsm, - var=pd.DataFrame(data={'names': self.genome_container.names, - self._ADATA_IDS_SFAIRA.gene_id_ensembl: self.genome_container.ensembl}, - index=self.genome_container.ensembl), - uns=self.adata.uns + X=x_new, + obs=self.adata.obs, + obsm=self.adata.obsm, + var=pd.DataFrame(data={'names': self.genome_container.names, + self._ADATA_IDS_SFAIRA.gene_id_ensembl: self.genome_container.ensembl}, + index=self.genome_container.ensembl), + uns=self.adata.uns ) - self.adata.uns['mapped_features'] = match_to_reference - - def _convert_and_set_var_names( - self, - symbol_col: str = None, - ensembl_col: str = None, - ): - if symbol_col and ensembl_col: - if symbol_col == 'index': - self.adata.var.index.name = 'index' - self.adata.var = self.adata.var.reset_index().rename( - {'index': self._ADATA_IDS_SFAIRA.gene_id_names}, - axis='columns' - ) - else: - self.adata.var = self.adata.var.rename( - {symbol_col: self._ADATA_IDS_SFAIRA.gene_id_names}, - axis='columns' - ) - - if ensembl_col == 'index': - self.adata.var.index.name = 'index' - self.adata.var = self.adata.var.reset_index().rename( - {'index': self._ADATA_IDS_SFAIRA.gene_id_ensembl}, - axis='columns' - ) - else: - self.adata.var = self.adata.var.rename( - {ensembl_col: self._ADATA_IDS_SFAIRA.gene_id_ensembl}, - axis='columns' - ) - - elif symbol_col: - id_dict = self.genome_container.names_to_id_dict - id_strip_dict = self.genome_container.strippednames_to_id_dict - if symbol_col == 'index': - self.adata.var.index.name = 'index' - self.adata.var = self.adata.var.reset_index().rename( - {'index': self._ADATA_IDS_SFAIRA.gene_id_names}, - axis='columns' - ) - else: - self.adata.var = self.adata.var.rename( - {symbol_col: self._ADATA_IDS_SFAIRA.gene_id_names}, - axis='columns' - ) + def _set_metadata_in_adata(self, celltype_version): + """ + Copy meta data from dataset class in .anndata. - # Matching gene names to ensembl ids in the following way: if the gene is present in the ensembl dictionary, - # match it straight away, if it is not in there we try to match everything in front of the first period in - # the gene name with a dictionary that was modified in the same way, if there is still no match we append na - ensids = [] - for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names]: - if n in id_dict.keys(): - ensids.append(id_dict[n]) - elif n.split(".")[0] in id_strip_dict.keys(): - ensids.append(id_strip_dict[n.split(".")[0]]) + :param celltype_version: + :return: + """ + # Set data set-wide attributes (.uns): + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = self.author + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = self.doi + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download + self.adata.uns[self._ADATA_IDS_SFAIRA.download_meta] = self.download_meta + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = self.normalization + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = self.year + + # Set cell-wise or data set-wide attributes (.uns / .obs): + # These are saved in .uns if they are data set wide to save memory. + for x, y, z in ( + [self.age, self._ADATA_IDS_SFAIRA.age, self.obs_key_age], + [self.dev_stage, self._ADATA_IDS_SFAIRA.dev_stage, self.obs_key_dev_stage], + [self.ethnicity, self._ADATA_IDS_SFAIRA.ethnicity, self.obs_key_ethnicity], + [self.healthy, self._ADATA_IDS_SFAIRA.healthy, self.obs_key_healthy], + [self.organ, self._ADATA_IDS_SFAIRA.organ, self.obs_key_organ], + [self.protocol, self._ADATA_IDS_SFAIRA.protocol, self.obs_key_protocol], + [self.sex, self._ADATA_IDS_SFAIRA.sex, self.obs_key_sex], + [self.organism, self._ADATA_IDS_SFAIRA.organism, self.obs_key_organism], + [self.state_exact, self._ADATA_IDS_SFAIRA.state_exact, self.obs_key_state_exact], + ): + if x is None and z is None: + self.adata.uns[y] = None + elif x is not None and z is not None: + raise ValueError(f"attribute {y} of data set {self.id} was set both for full data set and per cell, " + f"only set one of the two or neither.") + elif x is not None and z is None: + # Attribute supplied per data set: Write into .uns. + self.adata.uns[y] = x + elif x is None and z is not None: + # Attribute supplied per cell: Write into .obs. + # Search for direct match of the sought-after column name or for attribute specific obs key. + if z not in self.adata.obs.keys(): + # This should not occur in single data set loaders (see warning below) but can occur in + # streamlined data loaders if not all instances of the streamlined data sets have all columns + # in .obs set. + self.adata.uns[y] = None + print(f"WARNING: attribute {y} of data set {self.id} was not found in column {z}") # debugging else: - ensids.append('n/a') - self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = ensids - - elif ensembl_col: - id_dict = self.genome_container.id_to_names_dict - if ensembl_col == 'index': - self.adata.var.index.name = 'index' - self.adata.var = self.adata.var.reset_index().rename( - {'index': self._ADATA_IDS_SFAIRA.gene_id_ensembl}, - axis='columns' - ) + # Include flag in .uns that this attribute is in .obs: + self.adata.uns[y] = UNS_STRING_META_IN_OBS + # Remove potential pd.Categorical formatting: + self.adata.obs[y] = self.adata.obs[z].values.tolist() else: - self.adata.var = self.adata.var.rename( - {ensembl_col: self._ADATA_IDS_SFAIRA.gene_id_names}, - axis='columns' - ) - - self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names] = [ - id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' - for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] - ] - - else: - raise ValueError('Please provide the name of at least the name of the var column containing ensembl ids or' - 'the name of the var column containing gene symbols') - - self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_index].values.tolist() - self.adata.var_names_make_unique() + assert False, "switch option should not occur" + # Set cell-wise attributes (.obs): + # None so far other than celltypes. + # Set cell types: + if self._ADATA_IDS_SFAIRA.cell_ontology_id not in self.adata.obs.columns: + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_id] = None + # Map cell type names from raw IDs to ontology maintained ones:: + if self._ADATA_IDS_SFAIRA.cell_ontology_class in self.adata.obs.columns: + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.map_ontology_class( + raw_ids=self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values, + celltype_version=celltype_version + ) def subset_organs(self, subset: Union[None, List]): if self.organ == "mixed": @@ -302,8 +516,16 @@ def subset_organs(self, subset: Union[None, List]): warnings.warn("You are trying to subset organs after loading the dataset." "This will have no effect unless the dataset is loaded again.") - def load_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: np.ndarray, fn: Union[None, str] = None, - celltype_version: Union[str, None] = None): + def load_tobacked( + self, + adata_backed: anndata.AnnData, + genome: str, + idx: np.ndarray, + fn: Union[None, str] = None, + celltype_version: Union[str, None] = None, + load_raw: bool = False, + allow_caching: bool = True + ): """ Loads data set into slice of backed anndata object. @@ -317,13 +539,17 @@ def load_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: np.ndar :param keys: :param fn: :param celltype_version: Version of cell type ontology to use. Uses most recent if None. + :param load_raw: See .load(). + :param allow_caching: See .load(). :return: New row index for next element to be written into backed anndata. """ self.load( fn=fn, celltype_version=celltype_version, remove_gene_version=True, - match_to_reference=genome + match_to_reference=genome, + load_raw=load_raw, + allow_caching=allow_caching ) # Check if writing to sparse or dense matrix: if isinstance(adata_backed.X, np.ndarray) or \ @@ -346,7 +572,7 @@ def load_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: np.ndar adata_backed.obs.loc[np.sort(idx), k] = [self.adata.uns[k] for i in range(len(idx))] else: # Need to fill this instead of throwing an exception as this condition can trigger for one element - # within a loop over multiple data sets (ie in data set groups). + # within a loop over multiple data sets (ie in data set human). adata_backed.obs.loc[idx, k] = ["key_not_found" for i in range(len(idx))] elif isinstance(adata_backed.X, anndata._core.sparse_dataset.SparseDataset): # backed sparse # cannot scatter update on backed sparse yet! assert that updated block is meant to be appended: @@ -361,40 +587,36 @@ def load_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: np.ndar pandas.DataFrame(dict([ (k, [self.id for i in range(len(idx))]) if k == self._ADATA_IDS_SFAIRA.dataset else (k, self.adata.obs[k].values[np.argsort(idx)]) if k in self.adata.obs.columns - else (k, [self.adata.uns[k] for i in range(len(idx))]) if k in list(self.adata.uns.keys()) - else (k, ["key_not_found" for i in range(len(idx))]) + else (k, [self.adata.uns[k] for _ in range(len(idx))]) if k in list(self.adata.uns.keys()) + else (k, ["key_not_found" for _ in range(len(idx))]) for k in adata_backed.obs.columns ])) ) + self.clear() else: - raise ValueError(f"Did not reccognize backed AnnData.X format {type(adata_backed.X)}") + raise ValueError(f"Did not recognize backed AnnData.X format {type(adata_backed.X)}") - def set_unkown_class_id(self, ids: list): + def set_unkown_class_id(self, ids: List[str]): """ - Sets list of custom identifiers of unknown cell types in adata.obs["cell_ontology_class"] to the target one. + Sets list of custom identifiers of unknown cell types data annotation. - :param ids: IDs in adata.obs["cell_ontology_class"] to replace. + :param ids: IDs in cell type name column to replace by "unknown identifier. :return: """ - target_id = "unknown" - ontology_classes = [ - x if x not in ids else target_id - for x in self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].tolist() - ] - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = ontology_classes + self._unknown_celltype_identifiers.extend( + [x for x in ids if x not in self._ADATA_IDS_SFAIRA.unknown_celltype_identifiers] + ) - def _set_genome(self, - genome: str - ): + def _set_genome(self, genome: str): if genome.lower().startswith("homo_sapiens"): g = SuperGenomeContainer( - species="human", + organism="human", genome=genome ) elif genome.lower().startswith("mus_musculus"): g = SuperGenomeContainer( - species="mouse", + organism="mouse", genome=genome ) else: @@ -436,7 +658,6 @@ def map_ontology_class( """ :param raw_ids: - :param class_maps: :param celltype_version: Version of cell type ontology to use. Uses most recent if None. :return: """ @@ -444,7 +665,8 @@ def map_ontology_class( celltype_version = self.set_default_type_version() self.assert_celltype_version_key(celltype_version=celltype_version) return [ - self.class_maps[celltype_version][x] if x in self.class_maps[celltype_version].keys() else x + self.class_maps[celltype_version][x] if x in self.class_maps[celltype_version].keys() + else self._ADATA_IDS_SFAIRA.unknown_celltype_name if x.lower() in self._unknown_celltype_identifiers else x for x in raw_ids ] @@ -467,7 +689,7 @@ def meta_fn(self): else: return os.path.join(self.meta_path, self.doi_cleaned_id + "_meta.csv") - def load_meta(self, fn: Union[PathLike, str]): + def load_meta(self, fn: Union[PathLike, str, None]): if fn is None: if self.meta_fn is None: raise ValueError("provide either fn in load or path in constructor") @@ -475,49 +697,130 @@ def load_meta(self, fn: Union[PathLike, str]): else: if isinstance(fn, str): fn = os.path.normpath(fn) - self.meta = pandas.read_csv(fn, usecols=self.META_DATA_FIELDS) + # Only load meta data if file exists: + if os.path.isfile(fn): + meta = pandas.read_csv( + fn, usecols=list(self._META_DATA_FIELDS.keys()), dtype=str, + ) + # Formatting: All are read as string to allow dealing wth None entries: + # Make sure bool entries are bool: + for k, v in self._META_DATA_FIELDS.items(): + if v == bool: + meta[k] = [ + True if x == "True" else + False if x == "False" else None + for x in meta[k].values.tolist() + ] + else: + # Make sure None entries are formatted as None and not as string "None": + meta[k] = [None if x == "None" else x for x in meta[k].values.tolist()] + self.meta = meta def write_meta( self, fn_meta: Union[None, str] = None, - fn_data: Union[None, str] = None, dir_out: Union[None, str] = None, + fn_data: Union[None, str] = None, ): - if fn_meta is None: - if self.path is None and dir_out is None: - raise ValueError("provide either fn in load or path in constructor") - if dir_out is None: - dir_out = self.meta_path + """ + Write meta data object for data set. + + Does not cache data and attempts to load raw data. + + :param fn_meta: File to write to, selects automatically based on self.meta_path and self.id otherwise. + :param dir_out: Path to write to, file name is selected automatically based on self.id. + :param fn_data: See .load() + :return: + """ + if fn_meta is not None and dir_out is not None: + raise ValueError("supply either fn_meta or dir_out but not both") + elif fn_meta is None and dir_out is None: + if self.meta_fn is None: + raise ValueError("provide either fn in load or via constructor (meta_path)") fn_meta = self.meta_fn + elif fn_meta is None and dir_out is not None: + fn_meta = os.path.join(dir_out, self.doi_cleaned_id + "_meta.csv") + elif fn_meta is not None and dir_out is None: + pass # fn_meta is used + else: + assert False, "bug in switch" + if self.adata is None: - self.load(fn=fn_data, remove_gene_version=False, match_to_reference=None) + self.load( + fn=fn_data, + remove_gene_version=False, + match_to_reference=None, + load_raw=True, + allow_caching=False, + ) + # Add data-set wise meta data into table: meta = pandas.DataFrame({ self._ADATA_IDS_SFAIRA.annotated: self.adata.uns[self._ADATA_IDS_SFAIRA.annotated], self._ADATA_IDS_SFAIRA.author: self.adata.uns[self._ADATA_IDS_SFAIRA.author], self._ADATA_IDS_SFAIRA.doi: self.adata.uns[self._ADATA_IDS_SFAIRA.doi], self._ADATA_IDS_SFAIRA.download: self.adata.uns[self._ADATA_IDS_SFAIRA.download], + self._ADATA_IDS_SFAIRA.download_meta: self.adata.uns[self._ADATA_IDS_SFAIRA.download_meta], self._ADATA_IDS_SFAIRA.id: self.adata.uns[self._ADATA_IDS_SFAIRA.id], self._ADATA_IDS_SFAIRA.ncells: self.adata.n_obs, - self._ADATA_IDS_SFAIRA.normalization: self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] if self._ADATA_IDS_SFAIRA.normalization in self.adata.uns.keys() else None, - self._ADATA_IDS_SFAIRA.organ: self.adata.uns[self._ADATA_IDS_SFAIRA.organ], - self._ADATA_IDS_SFAIRA.protocol: self.adata.uns[self._ADATA_IDS_SFAIRA.protocol], - self._ADATA_IDS_SFAIRA.species: self.adata.uns[self._ADATA_IDS_SFAIRA.species], + self._ADATA_IDS_SFAIRA.normalization: self.adata.uns[self._ADATA_IDS_SFAIRA.normalization], self._ADATA_IDS_SFAIRA.year: self.adata.uns[self._ADATA_IDS_SFAIRA.year], }, index=range(1)) + # Expand table by variably cell-wise or data set-wise meta data: + for x in [ + self._ADATA_IDS_SFAIRA.age, + self._ADATA_IDS_SFAIRA.dev_stage, + self._ADATA_IDS_SFAIRA.ethnicity, + self._ADATA_IDS_SFAIRA.healthy, + self._ADATA_IDS_SFAIRA.organ, + self._ADATA_IDS_SFAIRA.protocol, + self._ADATA_IDS_SFAIRA.sex, + self._ADATA_IDS_SFAIRA.organism, + self._ADATA_IDS_SFAIRA.state_exact, + ]: + if self.adata.uns[x] == UNS_STRING_META_IN_OBS: + meta[x] = (np.sort(np.unique(self.adata.obs[x].values)),) + else: + meta[x] = self.adata.uns[x] + # Add cell types into table if available: + if self._ADATA_IDS_SFAIRA.cell_ontology_class in self.adata.obs.keys(): + meta[self._ADATA_IDS_SFAIRA.cell_ontology_class] = str(( + np.sort(np.unique(self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values)), + )) + else: + meta[self._ADATA_IDS_SFAIRA.cell_ontology_class] = " " meta.to_csv(fn_meta) + # Properties: + @property - def annotated(self) -> bool: - if self._annotated is not None: - return self._annotated + def age(self) -> Union[None, str]: + if self._age is not None: + return self._age else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.annotated] + if self.meta is not None and self._ADATA_IDS_SFAIRA.age in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.age] + else: + return None - @annotated.setter - def annotated(self, x: bool): - self._annotated = x + @age.setter + def age(self, x: str): + self.__erasing_protection(attr="age", val_old=self._age, val_new=x) + self.__value_protection(attr="age", allowed=self._ADATA_IDS_SFAIRA.age_allowed_entries, attempted=x) + self._age = x + + @property + def annotated(self) -> bool: + if self.obs_key_cellontology_id is not None or self.obs_key_cellontology_original is not None: + return True + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._ADATA_IDS_SFAIRA.annotated in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.annotated] + else: + return None @property def author(self) -> str: @@ -526,12 +829,33 @@ def author(self) -> str: else: if self.meta is None: self.load_meta(fn=None) + if self.meta is None or self._ADATA_IDS_SFAIRA.author not in self.meta.columns: + raise ValueError("author must be set but was neither set in constructor nor in meta data") return self.meta[self._ADATA_IDS_SFAIRA.author] @author.setter def author(self, x: str): + self.__erasing_protection(attr="author", val_old=self._author, val_new=x) self._author = x + @property + def dev_stage(self) -> Union[None, str]: + if self._dev_stage is not None: + return self._dev_stage + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._ADATA_IDS_SFAIRA.dev_stage in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.dev_stage] + else: + return None + + @dev_stage.setter + def dev_stage(self, x: str): + self.__erasing_protection(attr="dev_stage", val_old=self._dev_stage, val_new=x) + self.__value_protection(attr="dev_stage", allowed=self._ADATA_IDS_SFAIRA.dev_stage_allowed_entries, attempted=x) + self._dev_stage = x + @property def doi(self) -> str: if self._doi is not None: @@ -539,24 +863,121 @@ def doi(self) -> str: else: if self.meta is None: self.load_meta(fn=None) + if self.meta is None or self._ADATA_IDS_SFAIRA.healthy not in self.meta.columns: + raise ValueError("doi must be set but was neither set in constructor nor in meta data") return self.meta[self._ADATA_IDS_SFAIRA.doi] @doi.setter def doi(self, x: str): + self.__erasing_protection(attr="doi", val_old=self._doi, val_new=x) self._doi = x @property - def download(self) -> str: + def download(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: + """ + Data download website(s). + + Save as tuple with single element, which is a list of all download websites relevant to dataset. + :return: + """ if self._download is not None: - return self._download + x = self._download else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.download] + x = self.meta[self._ADATA_IDS_SFAIRA.download] + if isinstance(x, str) or x is None: + x = [x] + if isinstance(x, list): + x = (x,) + return x @download.setter - def download(self, x: str): - self._download = x + def download(self, x: Union[str, None, List[str], Tuple[str], List[None], Tuple[None]]): + self.__erasing_protection(attr="download", val_old=self._download, val_new=x) + # Formats to tuple with single element, which is a list of all download websites relevant to dataset, + # which can be used as a single element column in a pandas data frame. + if isinstance(x, str) or x is None: + x = [x] + if isinstance(x, list): + x = (x,) + self._download = (x,) + + @property + def download_meta(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: + """ + Meta data download website(s). + + Save as tuple with single element, which is a list of all download websites relevant to dataset. + :return: + """ + x = self._download_meta + # if self._download_meta is not None: # TODO add this back in once download_meta is routineyl set in datasets + # x = self._download_meta + # else: + # if self.meta is None: + # self.load_meta(fn=None) + # x = self.meta[self._ADATA_IDS_SFAIRA.download_meta] + if isinstance(x, str) or x is None: + x = [x] + if isinstance(x, list): + x = (x,) + return x + + @download_meta.setter + def download_meta(self, x: Union[str, None, List[str], Tuple[str], List[None], Tuple[None]]): + self.__erasing_protection(attr="download_meta", val_old=self._download_meta, val_new=x) + # Formats to tuple with single element, which is a list of all download websites relevant to dataset, + # which can be used as a single element column in a pandas data frame. + if isinstance(x, str) or x is None: + x = [x] + if isinstance(x, list): + x = (x,) + self._download_meta = (x,) + + @property + def ethnicity(self) -> Union[None, str]: + if self._ethnicity is not None: + return self._ethnicity + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._ADATA_IDS_SFAIRA.ethnicity in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.ethnicity] + else: + return None + + @ethnicity.setter + def ethnicity(self, x: str): + self.__erasing_protection(attr="ethnicity", val_old=self._ethnicity, val_new=x) + self.__value_protection(attr="ethnicity", allowed=self._ADATA_IDS_SFAIRA.ethnicity_allowed_entries, attempted=x) + self._ethnicity = x + + @property + def healthy(self) -> Union[None, bool]: + if self._healthy is not None: + return self._healthy + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._ADATA_IDS_SFAIRA.healthy in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.healthy] + else: + return None + + @healthy.setter + def healthy(self, x: bool): + self.__erasing_protection(attr="healthy", val_old=self._healthy, val_new=x) + self._healthy = x + + @property + def healthy_state_healthy(self) -> str: + return self._healthy_state_healthy + + @healthy_state_healthy.setter + def healthy_state_healthy(self, x: str): + self.__erasing_protection(attr="healthy_state_healthy", val_old=self._healthy_state_healthy, val_new=x) + self._healthy_state_healthy = x @property def id(self) -> str: @@ -569,8 +990,29 @@ def id(self) -> str: @id.setter def id(self, x: str): + self.__erasing_protection(attr="id", val_old=self._id, val_new=x) self._id = x + @property + def meta(self) -> Union[None, pd.DataFrame]: + return self._meta + + @meta.setter + def meta(self, x: Union[None, pd.DataFrame]): + # Make sure formatting is correct: + if x is not None: + for k, v in x.items(): + v = v.tolist() # avoid numpy data types + if k not in self._META_DATA_FIELDS.keys(): + raise ValueError(f"did not find {k} in format look up table") + else: + if x[k] is not None: # None is always allowed. + if not isinstance(v[0], self._META_DATA_FIELDS[k]): + raise ValueError(f"key {k} of signature {str(v[0])} " + f"in meta data table did not match signature " + f"{str(self._META_DATA_FIELDS[k])}") + self._meta = x + @property def ncells(self) -> int: if self.adata is not None: @@ -584,78 +1026,369 @@ def ncells(self) -> int: return int(x) @property - def normalization(self) -> str: + def normalization(self) -> Union[None, str]: if self._normalization is not None: return self._normalization else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.normalization] + if self.meta is not None and self._ADATA_IDS_SFAIRA.normalization in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.normalization] + else: + return None @normalization.setter def normalization(self, x: str): + self.__erasing_protection(attr="normalization", val_old=self._normalization, val_new=x) + self.__value_protection(attr="normalization", allowed=self._ADATA_IDS_SFAIRA.normalization_allowed_entries, + attempted=x) self._normalization = x @property - def organ(self) -> str: + def obs_key_age(self) -> str: + return self._obs_key_age + + @obs_key_age.setter + def obs_key_age(self, x: str): + self.__erasing_protection(attr="obs_key_age", val_old=self._obs_key_age, val_new=x) + self._obs_key_age = x + + @property + def obs_key_cellontology_id(self) -> str: + return self._obs_key_cellontology_id + + @obs_key_cellontology_id.setter + def obs_key_cellontology_id(self, x: str): + self.__erasing_protection(attr="obs_key_cellontology_id", val_old=self._obs_key_cellontology_id, val_new=x) + self._obs_key_cellontology_id = x + + @property + def obs_key_cellontology_original(self) -> str: + return self._obs_key_cellontology_original + + @obs_key_cellontology_original.setter + def obs_key_cellontology_original(self, x: str): + self.__erasing_protection(attr="obs_key_cellontology_original", val_old=self._obs_key_cellontology_original, + val_new=x) + self._obs_key_cellontology_original = x + + @property + def obs_key_dev_stage(self) -> str: + return self._obs_key_dev_stage + + @obs_key_dev_stage.setter + def obs_key_dev_stage(self, x: str): + self.__erasing_protection(attr="obs_key_dev_stage", val_old=self._obs_key_dev_stage, val_new=x) + self._obs_key_dev_stage = x + + @property + def obs_key_ethnicity(self) -> str: + return self._obs_key_ethnicity + + @obs_key_ethnicity.setter + def obs_key_ethnicity(self, x: str): + self.__erasing_protection(attr="obs_key_ethnicity", val_old=self._obs_key_ethnicity, val_new=x) + self._obs_key_ethnicity = x + + @property + def obs_key_healthy(self) -> str: + return self._obs_key_healthy + + @obs_key_healthy.setter + def obs_key_healthy(self, x: str): + self.__erasing_protection(attr="obs_key_healthy", val_old=self._obs_key_healthy, val_new=x) + self._obs_key_healthy = x + + @property + def obs_key_organ(self) -> str: + return self._obs_key_organ + + @obs_key_organ.setter + def obs_key_organ(self, x: str): + self.__erasing_protection(attr="obs_key_organ", val_old=self._obs_key_organ, val_new=x) + self._obs_key_organ = x + + @property + def obs_key_organism(self) -> str: + return self._obs_key_organism + + @obs_key_organism.setter + def obs_key_organism(self, x: str): + self.__erasing_protection(attr="obs_key_organism", val_old=self._obs_key_organism, val_new=x) + self._obs_key_organism = x + + @property + def obs_key_protocol(self) -> str: + return self._obs_key_protocol + + @obs_key_protocol.setter + def obs_key_protocol(self, x: str): + self.__erasing_protection(attr="obs_key_protocol", val_old=self._obs_key_protocol, val_new=x) + self._obs_key_protocol = x + + @property + def obs_key_sex(self) -> str: + return self._obs_key_sex + + @obs_key_sex.setter + def obs_key_sex(self, x: str): + self.__erasing_protection(attr="obs_key_sex", val_old=self._obs_key_sex, val_new=x) + self._obs_key_sex = x + + @property + def obs_key_state_exact(self) -> str: + return self._obs_key_state_exact + + @obs_key_state_exact.setter + def obs_key_state_exact(self, x: str): + self.__erasing_protection(attr="obs_key_state_exact", val_old=self._obs_key_state_exact, val_new=x) + self._obs_key_state_exact = x + + @property + def organ(self) -> Union[None, str]: if self._organ is not None: return self._organ else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.organ] + if self.meta is not None and self._ADATA_IDS_SFAIRA.organ in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.organ] + else: + return None @organ.setter def organ(self, x: str): + self.__erasing_protection(attr="organ", val_old=self._organ, val_new=x) + self.__value_protection(attr="organ", allowed=self._ADATA_IDS_SFAIRA.organ_allowed_entries, attempted=x) self._organ = x @property - def protocol(self) -> str: + def organism(self) -> Union[None, str]: + if self._organism is not None: + return self._organism + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._ADATA_IDS_SFAIRA.organism in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.organism] + else: + return None + + @organism.setter + def organism(self, x: str): + self.__erasing_protection(attr="organism", val_old=self._organism, val_new=x) + self.__value_protection(attr="organism", allowed=self._ADATA_IDS_SFAIRA.organism_allowed_entries, attempted=x) + self._organism = x + + @property + def protocol(self) -> Union[None, str]: if self._protocol is not None: return self._protocol else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.protocol] + if self.meta is not None and self._ADATA_IDS_SFAIRA.protocol in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.protocol] + else: + return None @protocol.setter def protocol(self, x: str): + self.__erasing_protection(attr="protocol", val_old=self._protocol, val_new=x) + self.__value_protection(attr="protocol", allowed=self._ADATA_IDS_SFAIRA.protocol_allowed_entries, attempted=x) self._protocol = x @property - def species(self) -> str: - if self._species is not None: - return self._species + def sex(self) -> Union[None, str]: + if self._sex is not None: + return self._sex + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._ADATA_IDS_SFAIRA.sex in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.sex] + else: + return None + + @sex.setter + def sex(self, x: str): + self.__erasing_protection(attr="sex", val_old=self._sex, val_new=x) + self.__value_protection(attr="sex", allowed=self._ADATA_IDS_SFAIRA.sex_allowed_entries, attempted=x) + self._sex = x + + @property + def source(self) -> str: + return self._source + + @source.setter + def source(self, x: Union[str, None]): + self.__erasing_protection(attr="source", val_old=self._source, val_new=x) + self._source = x + + @property + def state_exact(self) -> Union[None, str]: + if self._state_exact is not None: + return self._state_exact else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.species] + if self.meta is not None and self._ADATA_IDS_SFAIRA.state_exact in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.state_exact] + else: + return None + + @state_exact.setter + def state_exact(self, x: str): + self.__erasing_protection(attr="state_exact", val_old=self._state_exact, val_new=x) + self._state_exact = x + + @property + def var_ensembl_col(self) -> str: + return self._var_ensembl_col + + @var_ensembl_col.setter + def var_ensembl_col(self, x: str): + self.__erasing_protection(attr="var_ensembl_col", val_old=self._var_ensembl_col, val_new=x) + self._var_ensembl_col = x + + @property + def var_symbol_col(self) -> str: + return self._var_symbol_col - @species.setter - def species(self, x: str): - self._species = x + @var_symbol_col.setter + def var_symbol_col(self, x: str): + self.__erasing_protection(attr="var_symbol_col", val_old=self._var_symbol_col, val_new=x) + self._var_symbol_col = x @property - def year(self) -> str: + def year(self) -> Union[None, int]: if self._year is not None: return self._year else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.year] + if self.meta is not None and self._ADATA_IDS_SFAIRA.year in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.year] + else: + return None @year.setter - def year(self, x: str): + def year(self, x: int): + self.__erasing_protection(attr="year", val_old=self._year, val_new=x) + self.__value_protection(attr="year", allowed=self._ADATA_IDS_SFAIRA.year_allowed_entries, attempted=x) self._year = x + # Private methods: + + def __erasing_protection(self, attr, val_old, val_new): + """ + This is called when a erasing protected attribute is set to check whether it was set before. + + :param attr: Attribute to be set. + :param val_old: Old value for attribute to be set. + :param val_new: New value for attribute to be set. + """ + if val_old is not None: + raise ValueError(f"attempted to set erasing protected attribute {attr}: " + f"previously was {str(val_old)}, attempted to set {str(val_new)}") + + def __value_protection(self, attr, allowed, attempted): + """ + Check whether value is from set of allowed values. + + Does not check if allowed is None. + + :param attr: + :param allowed: + :param attempted: + :return: + """ + if allowed is not None: + if not isinstance(attempted, list) and not isinstance(attempted, tuple): + attempted = [attempted] + for x in attempted: + if x not in allowed: + raise ValueError(f"{x} is not a valid entry for {attr}, choose from: {str(allowed)}") + + +class DatasetBaseGroupLoading(DatasetBase): + """ + Container class specific to datasets which come in groups and require specialised loading. + """ + _unprocessed_full_group_object: bool + + def __init__( + self, + path: Union[str, None], + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self._unprocessed_full_group_object = False + + @abc.abstractmethod + def _load_full_group_object(self, fn=None) -> Union[None, anndata.AnnData]: + """ + Loads a raw anndata object that correponds to a superset of the data belonging to this Dataset. + + Override this method in the Dataset if this is relevant. + :return: adata_group + """ + pass + + def set_raw_full_group_object(self, fn=None, adata_group: Union[None, anndata.AnnData] = None): + if self.adata is None and adata_group is not None: + self.adata = adata_group + elif self.adata is None and adata_group is not None: + self.adata = self._load_full_group_object(fn=fn) + elif self.adata is not None and self._unprocessed_full_group_object: + pass + else: + assert False, "switch error" + self._unprocessed_full_group_object = True + return True + + def _load_from_group(self): + """ + Sets .adata based on a raw anndata object that correponds to a superset of the data belonging to this Dataset, + including subsetting. + + Override this method in the Dataset if this is relevant. + """ + pass + + def _subset_from_group( + self, + subset_items: dict, + ): + """ + Subsets a raw anndata object to the data corresponding to this Dataset. + + :param subset_items: Key-value pairs for subsetting: Keys are columns in .obs, values are entries that should + be kept. If the dictionary has multiple entries, these are sequentially subsetted (AND-gate). + :return: + """ + assert self.adata is not None, "this method should only be called if .adata is not None" + for k, v in subset_items: + self.adata = self.adata[[x in v for x in self.adata.obs[k].values], :] -class DatasetGroupBase(abc.ABC): + def _load(self, fn): + _ = self.set_raw_full_group_object(fn=fn, adata_group=None) + if self._unprocessed_full_group_object: + self._load_from_group() + self._unprocessed_full_group_object = False + + +class DatasetGroup: """ + Container class that co-manages multiple data sets, removing need to call Dataset() methods directly through + wrapping them. Example: - #query human lung - #from sfaira.dev.data.human.lung import DatasetGroupLung as DatasetGroup + #query loaders lung + #from sfaira.dev.data.loaders.lung import DatasetGroupLung as DatasetGroup #dsg_humanlung = DatasetGroupHuman(path='path/to/data') #dsg_humanlung.load_all(match_to_reference='Homo_sapiens_GRCh38_97') #dsg_humanlung[some_id] @@ -663,62 +1396,116 @@ class DatasetGroupBase(abc.ABC): """ datasets: Dict - def __init__(self): + def __init__(self, datasets: dict): + self.datasets = datasets self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() - def subset_organs(self, subset: Union[None, List]): - for i in self.ids: - if self.datasets[i].organ == "mixed": - self.datasets[i].subset_organs(subset) - else: - raise ValueError("Only data that contain multiple organs can be subset.") + def _load_group(self, load_raw: bool): + """ - def load_all( + :param load_raw: See .load(). + :return: + """ + return None + + def load( self, - celltype_version: Union[str, None] = None, annotated_only: bool = False, + celltype_version: Union[str, None] = None, remove_gene_version: bool = True, match_to_reference: Union[str, None] = None, - load_raw: bool = False + load_raw: bool = False, + allow_caching: bool = True, + processes: int = 1, + func=None, + kwargs_func: Union[None, dict] = None, ): """ + Load all datasets in group (option for temporary loading). - Subsets self.datasets to the data sets that were found. + Note: This method automatically subsets to the group to the data sets for which input files were found. + + This method also allows temporarily loading data sets to execute function on loaded data sets (supply func). + In this setting, datasets are removed from memory after the function has been executed. - :param celltype_version: Version of cell type ontology to use. Uses most recent if None. :param annotated_only: - :param remove_gene_version: - :param match_to_reference: - :param load_raw: Loads unprocessed version of data if available in data loader. + :param celltype_version: See .load(). + :param remove_gene_version: See .load(). + :param match_to_reference: See .load(). + :param load_raw: See .load(). + :param allow_caching: See .load(). + :param processes: Processes to parallelise loading over. Uses python multiprocessing if > 1, for loop otherwise. + :param func: Function to run on loaded datasets. map_fun should only take one argument, which is a Dataset + instance. The return can be empty: + + def func(dataset, **kwargs_func): + # code manipulating dataset and generating output x. + return x + :param kwargs_func: Kwargs of func. :return: """ - for x in self.ids: - try: - if self.datasets[x].annotated or not annotated_only: - self.datasets[x].load( - celltype_version=self.format_type_version(celltype_version), - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, - load_raw=load_raw - ) - except FileNotFoundError as e: - print(e) - del self.datasets[x] + formatted_version = self.format_type_version(celltype_version) + args = [ + formatted_version, + remove_gene_version, + match_to_reference, + load_raw, + allow_caching, + func, + kwargs_func + ] - def load_all_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: List[np.ndarray], - annotated_only: bool = False, celltype_version: Union[str, None] = None): + if processes > 1 and len(self.datasets.items()) > 1: # multiprocessing parallelisation + print(f"using python multiprocessing (processes={processes}), " + f"for easier debugging revert to sequential execution (processes=1)") + with multiprocessing.Pool(processes=processes) as pool: + res = pool.starmap(map_fn, [ + (tuple([v] + args),) + for k, v in self.datasets.items() if v.annotated or not annotated_only + ]) + # Clear data sets that were not successfully loaded because of missing data: + for x in res: + if x is not None: + print(x[1]) + del self.datasets[x[0]] + else: # for loop + adata_group = None + for k, v in self.datasets.items(): + print(f"loading {k}") + group_loading = v.set_raw_full_group_object(fn=None, adata_group=adata_group) + if adata_group is None and group_loading: # cache full adata object for subsequent Datasets + adata_group = v.adata.copy() + x = map_fn(tuple([v] + args)) + # Clear data sets that were not successfully loaded because of missing data: + if x is not None: + print(x[1]) + del self.datasets[x[0]] + del adata_group + + def load_tobacked( + self, + adata_backed: anndata.AnnData, + genome: str, + idx: List[np.ndarray], + annotated_only: bool = False, + celltype_version: Union[str, None] = None, + load_raw: bool = False, + allow_caching: bool = True, + ): """ Loads data set group into slice of backed anndata object. - Subsets self.datasets to the data sets that were found. + Subsets self.datasets to the data sets that were found. Note that feature space is automatically formatted as + this is necessary for concatenation. - :param adata_backed: + :param adata_backed: Anndata instance to load into. :param genome: Genome container target genomes loaded. :param idx: Indices in adata_backed to write observations to. This can be used to immediately create a shuffled object. This has to be a list of the length of self.data, one index array for each dataset. - :param keys: :param annotated_only: - :param celltype_version: Version of cell type ontology to use. Uses most recent if None. + :param celltype_version: See .load(). + :param load_raw: See .load(). + :param allow_caching: See .load(). :return: New row index for next element to be written into backed anndata. """ i = 0 @@ -727,8 +1514,13 @@ def load_all_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: Lis try: if self.datasets[x].annotated or not annotated_only: self.datasets[x].load_tobacked( - adata_backed=adata_backed, genome=genome, idx=idx[i], - celltype_version=self.format_type_version(celltype_version)) + adata_backed=adata_backed, + genome=genome, + idx=idx[i], + celltype_version=self.format_type_version(celltype_version), + load_raw=load_raw, + allow_caching=allow_caching + ) i += 1 except FileNotFoundError: del self.datasets[x] @@ -756,7 +1548,6 @@ def adata(self): adata.obs[self._ADATA_IDS_SFAIRA.author] = adata.uns[self._ADATA_IDS_SFAIRA.author] adata.obs[self._ADATA_IDS_SFAIRA.year] = adata.uns[self._ADATA_IDS_SFAIRA.year] adata.obs[self._ADATA_IDS_SFAIRA.protocol] = adata.uns[self._ADATA_IDS_SFAIRA.protocol] - adata.obs[self._ADATA_IDS_SFAIRA.subtissue] = adata.uns[self._ADATA_IDS_SFAIRA.subtissue] if self._ADATA_IDS_SFAIRA.normalization in adata.uns.keys(): adata.obs[self._ADATA_IDS_SFAIRA.normalization] = adata.uns[self._ADATA_IDS_SFAIRA.normalization] if self._ADATA_IDS_SFAIRA.dev_stage in adata.obs.columns: @@ -774,11 +1565,10 @@ def adata(self): self._ADATA_IDS_SFAIRA.author, self._ADATA_IDS_SFAIRA.year, self._ADATA_IDS_SFAIRA.protocol, - self._ADATA_IDS_SFAIRA.subtissue, self._ADATA_IDS_SFAIRA.normalization, self._ADATA_IDS_SFAIRA.dev_stage, self._ADATA_IDS_SFAIRA.annotated, - "mapped_features" + self._ADATA_IDS_SFAIRA.mapped_features, ] for k in list(adata.uns.keys()): if k not in keys_to_keep: @@ -811,10 +1601,11 @@ def adata(self): adata_concat.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = adata_concat.var.index - if len(set([a.uns['mapped_features'] for a in adata_ls])) == 1: - adata_concat.uns['mapped_features'] = adata_ls[0].uns['mapped_features'] + if len(set([a.uns[self._ADATA_IDS_SFAIRA.mapped_features] for a in adata_ls])) == 1: + adata_concat.uns[self._ADATA_IDS_SFAIRA.mapped_features] = \ + adata_ls[0].uns[self._ADATA_IDS_SFAIRA.mapped_features] else: - adata_concat.uns['mapped_features'] = False + adata_concat.uns[self._ADATA_IDS_SFAIRA.mapped_features] = False else: adata_concat = adata_ls[0] adata_concat.obs[self._ADATA_IDS_SFAIRA.dataset] = self.ids[0] @@ -851,7 +1642,7 @@ def ncells_bydataset(self, annotated_only: bool = False) -> np.ndarray: cells.append(self.datasets[x].ncells) except FileNotFoundError: del self.datasets[x] - return cells + return np.asarray(cells) def ncells(self, annotated_only: bool = False): cells = self.ncells_bydataset(annotated_only=annotated_only) @@ -886,7 +1677,7 @@ def format_type_version(self, version): versions = np.array(list(versions)) return versions[np.argmax([int(x) for x in versions])] else: - self.assert_celltype_version_key() + self.assert_celltype_version_key(celltype_version=version) return version def subset(self, key, values): @@ -915,36 +1706,111 @@ def subset(self, key, values): for x in ids_del: del self.datasets[x] + def subset_organs(self, subset: Union[None, List]): + for i in self.ids: + if self.datasets[i].organ == "mixed": + self.datasets[i].subset_organs(subset) + else: + raise ValueError("Only data that contain multiple organs can be subset.") + + +class DatasetGroupDirectoryOriented(DatasetGroup): + + def __init__( + self, + file_base: str, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + ): + """ + Automatically collects Datasets from python files in directory. + + Uses a pre-built DatasetGroup if this is defined in a group.py file, otherwise, the DatasetGroup is initialised + here. + + :param file_base: + :param path: + :param meta_path: + :param cache_path: + """ + # Collect all data loaders from files in directory: + datasets = [] + cwd = os.path.dirname(file_base) + dataset_module = str(cwd.split("/")[-1]) + if "group.py" in os.listdir(cwd): + DatasetGroupFound = pydoc.locate( + "sfaira.sfaira.data.dataloaders.loaders." + dataset_module + ".group.DatasetGroup") + dsg = DatasetGroupFound(path=path, meta_path=meta_path, cache_path=cache_path) + datasets.extend(list(dsg.datasets.values)) + else: + for f in os.listdir(cwd): + if os.path.isfile(os.path.join(cwd, f)): # only files + # Narrow down to data set files: + if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: + file_module = ".".join(f.split(".")[:-1]) + DatasetFound = pydoc.locate( + "sfaira.sfaira.data.dataloaders.loaders." + dataset_module + "." + file_module + ".Dataset") + datasets.append(DatasetFound(path=path, meta_path=meta_path, cache_path=cache_path)) + + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + class DatasetSuperGroup: """ Container for multiple DatasetGroup instances. - Can be used to grid_searches models across organs. Supports backed anndata objects. + Used to manipulate structured dataset collections. Primarly designed for this manipulation, convert to DatasetGroup + via flatten() for more functionalities. """ adata: Union[None, anndata.AnnData] fn_backed: Union[None, PathLike] - dataset_groups: List[DatasetGroupBase] + dataset_groups: Union[List[DatasetGroup], List[DatasetSuperGroup]] - def __init__(self, dataset_groups: Union[None, List[DatasetGroupBase]]): + def __init__(self, dataset_groups: Union[None, List[DatasetGroup], List[DatasetSuperGroup]]): self.adata = None self.fn_backed = None self.set_dataset_groups(dataset_groups=dataset_groups) self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() + def set_dataset_groups(self, dataset_groups: Union[List[DatasetGroup], List[DatasetSuperGroup]]): + if isinstance(dataset_groups[0], DatasetGroup): + self.dataset_groups = dataset_groups + elif isinstance(dataset_groups[0], DatasetSuperGroup): + # Decompose super groups first + dataset_groups_proc = [] + for x in dataset_groups: + dataset_groups_proc.extend(x.dataset_groups) + self.dataset_groups = dataset_groups_proc + else: + assert False + + def extend_dataset_groups(self, dataset_groups: Union[List[DatasetGroup], List[DatasetSuperGroup]]): + if isinstance(dataset_groups[0], DatasetGroup): + self.dataset_groups.extend(dataset_groups) + elif isinstance(dataset_groups[0], DatasetSuperGroup): + # Decompose super groups first + dataset_groups_proc = [] + for x in dataset_groups: + dataset_groups_proc.extend(x.dataset_groups) + self.dataset_groups.extend(dataset_groups_proc) + else: + assert False + def get_gc( self, genome: str = None ): if genome.lower().startswith("homo_sapiens"): g = SuperGenomeContainer( - species="human", + organism="human", genome=genome ) elif genome.lower().startswith("mus_musculus"): g = SuperGenomeContainer( - species="mouse", + organism="mouse", genome=genome ) else: @@ -968,40 +1834,52 @@ def ncells_bydataset_flat(self, annotated_only: bool = False): def ncells(self, annotated_only: bool = False): return np.sum(self.ncells_bydataset(annotated_only=annotated_only)) - def set_dataset_groups(self, dataset_groups: List[DatasetGroupBase]): - self.dataset_groups = dataset_groups + def flatten(self) -> DatasetGroup: + """ + Returns DatasetGroup (rather than self = DatasetSuperGroup) containing all listed data sets. - def subset_organs(self, subset: Union[None, List]): + :return: + """ + ds = {} for x in self.dataset_groups: - if x.datasets[0].organ == "mixed": - x.subset_organs(subset) + for k, v in x.datasets.items(): + assert k not in ds.keys(), f"{k} was duplicated in super group, purge duplicates before flattening" + ds[k] = v + return DatasetGroup(datasets=ds) def load_all( self, celltype_version: Union[str, None] = None, + annotated_only: bool = False, match_to_reference: Union[str, None] = None, remove_gene_version: bool = True, - annotated_only: bool = False, - load_raw: bool = False + load_raw: bool = False, + allow_caching: bool = True, + processes: int = 1, ): """ - Loads data set groups into anndata object. + Loads data set human into anndata object. :param celltype_version: Version of cell type ontology to use. Uses most recent within each DatasetGroup if None. - :param match_to_reference: - :param remove_gene_version: :param annotated_only: - :param load_raw: + :param match_to_reference: See .load(). + :param remove_gene_version: See .load(). + :param load_raw: See .load(). + :param allow_caching: See .load(). + :param processes: Processes to parallelise loading over. Uses python multiprocessing if > 1, for loop otherwise. + Note: parallelises loading of each dataset group, but not across groups. :return: """ for x in self.dataset_groups: - x.load_all( + x.load( annotated_only=annotated_only, remove_gene_version=remove_gene_version, match_to_reference=match_to_reference, celltype_version=celltype_version, - load_raw=load_raw + load_raw=load_raw, + allow_caching=allow_caching, + processes=processes, ) # making sure that concatenate is not used on a None adata object resulting from organ filtering for i in range(len(self.dataset_groups)): @@ -1021,9 +1899,11 @@ def load_all_tobacked( as_dense: bool = False, annotated_only: bool = False, celltype_version: Union[str, None] = None, + load_raw: bool = False, + allow_caching: bool = True, ): """ - Loads data set groups into backed anndata object. + Loads data set human into backed anndata object. Example usage: @@ -1039,9 +1919,11 @@ def load_all_tobacked( :param fn_backed: File name to save backed anndata to temporarily. :param genome: ID of target genomes. :param shuffled: Whether to shuffle data when writing to backed. - :param as_dense: + :param as_dense: Whether to load into dense count matrix. :param annotated_only: :param celltype_version: Version of cell type ontology to use. Uses most recent if None. + :param load_raw: See .load(). + :param allow_caching: See .load(). """ if shuffled and not as_dense: raise ValueError("cannot write backed shuffled and sparse") @@ -1075,7 +1957,6 @@ def load_all_tobacked( self._ADATA_IDS_SFAIRA.organ, self._ADATA_IDS_SFAIRA.protocol, self._ADATA_IDS_SFAIRA.state_exact, - self._ADATA_IDS_SFAIRA.subtissue, self._ADATA_IDS_SFAIRA.year, ] if scatter_update: @@ -1097,23 +1978,30 @@ def load_all_tobacked( for x in ncells: temp_ls = [] for y in x: - temp_ls.append(idx_vector[row:(row+y)]) + temp_ls.append(idx_vector[row:(row + y)]) row += y idx_ls.append(temp_ls) print("checking expected and received data set sizes, rerun meta data generation if mismatch is found:") print(self.ncells_bydataset(annotated_only=annotated_only)) print([[len(x) for x in xx] for xx in idx_ls]) for i, x in enumerate(self.dataset_groups): - x.load_all_tobacked(adata_backed=self.adata, genome=genome, idx=idx_ls[i], annotated_only=annotated_only, - celltype_version=celltype_version) + x.load_tobacked( + adata_backed=self.adata, + genome=genome, + idx=idx_ls[i], + annotated_only=annotated_only, + celltype_version=celltype_version, + load_raw=load_raw, + allow_caching=allow_caching, + ) # If the sparse non-shuffled approach is used, make sure that self.adata.obs.index is unique() before saving if not scatter_update: self.adata.obs.index = pd.RangeIndex(0, len(self.adata.obs.index)) # Explicitly write backed file to disk again to make sure that obs are included and that n_obs is set correctly self.adata.write() # Saving obs separately below is therefore no longer required (hence commented out) - #fn_backed_obs = ".".join(self.fn_backed.split(".")[:-1]) + "_obs.csv" - #self.adata.obs.to_csv(fn_backed_obs) + # fn_backed_obs = ".".join(self.fn_backed.split(".")[:-1]) + "_obs.csv" + # self.adata.obs.to_csv(fn_backed_obs) def delete_backed(self): del self.adata @@ -1136,3 +2024,8 @@ def subset(self, key, values): """ for x in self.dataset_groups: x.subset(key=key, values=values) + + def subset_organs(self, subset: Union[None, List]): + for x in self.dataset_groups: + if x.datasets[0].organ == "mixed": + x.subset_organs(subset) diff --git a/sfaira/data/databases/__init__.py b/sfaira/data/databases/__init__.py deleted file mode 100644 index 89402624a..000000000 --- a/sfaira/data/databases/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from sfaira.data.databases.cellxgene import DatasetCellxgene, DatasetGroupCellxgene diff --git a/sfaira/data/databases/cellxgene/__init__.py b/sfaira/data/databases/cellxgene/__init__.py deleted file mode 100644 index ac116b424..000000000 --- a/sfaira/data/databases/cellxgene/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data.databases.cellxgene.cellxgene_loader import DatasetCellxgene -from sfaira.data.databases.cellxgene.cellxgene_group import DatasetGroupCellxgene \ No newline at end of file diff --git a/sfaira/data/databases/cellxgene/cellxgene_group.py b/sfaira/data/databases/cellxgene/cellxgene_group.py deleted file mode 100644 index 4fd786d51..000000000 --- a/sfaira/data/databases/cellxgene/cellxgene_group.py +++ /dev/null @@ -1,30 +0,0 @@ -import pandas as pd -import os -from typing import Union - -from .external import DatasetGroupBase - -from .cellxgene_loader import DatasetCellxgene - - -class DatasetGroupCellxgene(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - fn_ls = os.listdir(path) - fn_ls = [x for x in fn_ls if x in self.accepted_file_names] - datasets = [ - DatasetCellxgene(path=path, fn=x, meta_path=meta_path) - for x in fn_ls - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - - @property - def accepted_file_names(self): - return [ - "krasnow_lab_human_lung_cell_atlas_smartseq2-2-remixed.h5ad" - ] diff --git a/sfaira/data/databases/cellxgene/cellxgene_loader.py b/sfaira/data/databases/cellxgene/cellxgene_loader.py deleted file mode 100644 index ba9bd42df..000000000 --- a/sfaira/data/databases/cellxgene/cellxgene_loader.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import numpy as np -import os -from typing import Union -from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE, META_DATA_FIELDS_CELLXGENE - - -class DatasetCellxgene(DatasetBase): - """ - This is a dataloader for downloaded h5ad from cellxgene. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None], - fn: str, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.fn = fn - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - fn = os.path.join(self.path, self.fn) - adata = anndata.read(fn) - adata.X = adata.raw.X - - self.adata.uns[ADATA_IDS_SFAIRA.author] = adata.uns[ADATA_IDS_CELLXGENE.author][ADATA_IDS_CELLXGENE.author_names] - self.adata.uns[ADATA_IDS_SFAIRA.year] = adata.uns[ADATA_IDS_CELLXGENE.year] - self.adata.uns[ADATA_IDS_SFAIRA.doi] = adata.uns[ADATA_IDS_CELLXGENE.doi] - if len(np.unique(adata.obs[ADATA_IDS_SFAIRA.species].values)) > 1: - raise Warning("found multiple assay in data set %s" % self.fn) - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = adata.obs[ADATA_IDS_CELLXGENE.protocol].values[0] - # Select tissue: blood is handled as a separate tissue in .obs - #if len(np.unique(adata.obs["tissue"].values)) > 1: - # raise Warning("found multiple tissue in data set %s" % self.fn) - #self.adata.uns["organ"] = adata.obs["tissue"].values[0] - self.adata.uns[ADATA_IDS_SFAIRA.organ] = str(self.fn).split("_")[3] - if len(np.unique(adata.obs[ADATA_IDS_SFAIRA.species].values)) > 1: - raise Warning("found multiple organisms in data set %s" % self.fn) - self.adata.uns[ADATA_IDS_SFAIRA.species] = adata.obs[ADATA_IDS_CELLXGENE.species].values[0] - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[ADATA_IDS_SFAIRA.subtissue] = adata.obs[ADATA_IDS_CELLXGENE.subtissue].values - self.adata.obs[ADATA_IDS_SFAIRA.dev_stage] = adata.obs[ADATA_IDS_CELLXGENE.dev_stage].values - self.adata.obs[ADATA_IDS_SFAIRA.sex] = adata.obs[ADATA_IDS_CELLXGENE.sex].values - self.adata.obs[ADATA_IDS_SFAIRA.ethnicity] = adata.obs[ADATA_IDS_CELLXGENE.ethnicity].values - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = adata.obs[ADATA_IDS_CELLXGENE.disease].values == ADATA_IDS_CELLXGENE.disease_state_healthy - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = adata.obs[ADATA_IDS_CELLXGENE.disease].values - - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_id] = adata.obs[ADATA_IDS_CELLXGENE.cell_ontology_id].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = adata.obs[ADATA_IDS_CELLXGENE.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = adata.obs[ADATA_IDS_CELLXGENE.cell_types_original].values.tolist() - - self._convert_and_set_var_names( - symbol_col=ADATA_IDS_CELLXGENE.gene_id_names, - ensembl_col=ADATA_IDS_CELLXGENE.gene_id_ensembl, - new_index=ADATA_IDS_CELLXGENE.gene_id_ensembl - ) - diff --git a/sfaira/data/databases/cellxgene/external.py b/sfaira/data/databases/cellxgene/external.py deleted file mode 100644 index 11aaeafb8..000000000 --- a/sfaira/data/databases/cellxgene/external.py +++ /dev/null @@ -1,3 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE -from sfaira.consts import META_DATA_FIELDS, META_DATA_FIELDS_CELLXGENE diff --git a/sfaira/data/dataloaders/__init__.py b/sfaira/data/dataloaders/__init__.py new file mode 100644 index 000000000..1df580fb0 --- /dev/null +++ b/sfaira/data/dataloaders/__init__.py @@ -0,0 +1,4 @@ +from . import anatomical_groups +from . import databases +from . import loaders +from .super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/__init__.py b/sfaira/data/dataloaders/anatomical_groups/__init__.py new file mode 100644 index 000000000..739fab642 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/__init__.py @@ -0,0 +1,2 @@ +from . import human +from . import mouse diff --git a/sfaira/data/dataloaders/anatomical_groups/human/__init__.py b/sfaira/data/dataloaders/anatomical_groups/human/__init__.py new file mode 100644 index 000000000..c4dfd5b7c --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/__init__.py @@ -0,0 +1,44 @@ +from .human_adipose import DatasetGroupAdipose +from .human_adrenalgland import DatasetGroupAdrenalgland +from .human_mixed import DatasetGroupMixed +from .human_artery import DatasetGroupArtery +from .human_bladder import DatasetGroupBladder +from .human_blood import DatasetGroupBlood +from .human_bone import DatasetGroupBone +from .human_brain import DatasetGroupBrain +from .human_calvaria import DatasetGroupCalvaria +from .human_cervix import DatasetGroupCervix +from .human_chorionicvillus import DatasetGroupChorionicvillus +from .human_colon import DatasetGroupColon +from .human_duodenum import DatasetGroupDuodenum +from .human_epityphlon import DatasetGroupEpityphlon +from .human_esophagus import DatasetGroupEsophagus +from .human_eye import DatasetGroupEye +from .human_fallopiantube import DatasetGroupFallopiantube +from .human_femalegonad import DatasetGroupFemalegonad +from .human_gallbladder import DatasetGroupGallbladder +from .human_heart import DatasetGroupHeart +from .human_hesc import DatasetGroupHesc +from .human_ileum import DatasetGroupIleum +from .human_jejunum import DatasetGroupJejunum +from .human_kidney import DatasetGroupKidney +from .human_liver import DatasetGroupLiver +from .human_lung import DatasetGroupLung +from .human_malegonad import DatasetGroupMalegonad +from .human_muscle import DatasetGroupMuscle +from .human_omentum import DatasetGroupOmentum +from .human_pancreas import DatasetGroupPancreas +from .human_placenta import DatasetGroupPlacenta +from .human_pleura import DatasetGroupPleura +from .human_prostate import DatasetGroupProstate +from .human_rectum import DatasetGroupRectum +from .human_rib import DatasetGroupRib +from .human_skin import DatasetGroupSkin +from .human_spinalcord import DatasetGroupSpinalcord +from .human_spleen import DatasetGroupSpleen +from .human_stomach import DatasetGroupStomach +from .human_thymus import DatasetGroupThymus +from .human_thyroid import DatasetGroupThyroid +from .human_trachea import DatasetGroupTrachea +from .human_ureter import DatasetGroupUreter +from .human_uterus import DatasetGroupUterus diff --git a/sfaira/data/dataloaders/anatomical_groups/human/external.py b/sfaira/data/dataloaders/anatomical_groups/human/external.py new file mode 100644 index 000000000..413092483 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/external.py @@ -0,0 +1,2 @@ +from sfaira.data import DatasetBase, DatasetGroup +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py new file mode 100644 index 000000000..4a531d920 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adipose_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupAdipose(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupAdipose + self.datasets.update(DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py new file mode 100644 index 000000000..0c6ab1bfa --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py @@ -0,0 +1,36 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_003 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_004 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_005 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_006 import Dataset as Dataset0006 + + +class DatasetGroupAdrenalgland(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupAdrenalgland + self.datasets.update(DatasetGroupAdrenalgland(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py new file mode 100644 index 000000000..0aa3abedf --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_artery_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupArtery(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupArtery + self.datasets.update(DatasetGroupArtery(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py new file mode 100644 index 000000000..f39d8a55a --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bladder_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bladder_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bladder_2020_microwell_han_003 import Dataset as Dataset0003 + + +class DatasetGroupBladder(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupBladder + self.datasets.update(DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py new file mode 100644 index 000000000..a63658118 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py @@ -0,0 +1,42 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d_nan.human_blood_2018_10x_ica_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d_nan.human_blood_2019_10x_10xGenomics_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_002 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_003 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_004 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_005 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_006 import Dataset as Dataset0008 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_007 import Dataset as Dataset0009 + + +class DatasetGroupBlood(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupBlood + self.datasets.update(DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py new file mode 100644 index 000000000..0decbe187 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d_nan.human_bone_2018_10x_ica_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bone_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bone_2020_microwell_han_002 import Dataset as Dataset0003 + + +class DatasetGroupBone(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupBone + self.datasets.update(DatasetGroupBone(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py new file mode 100644 index 000000000..1f81ae71c --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py @@ -0,0 +1,38 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_nmeth_4407.human_brain_2017_DroNcSeq_habib_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_002 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_003 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_004 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_005 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_006 import Dataset as Dataset0007 + + +class DatasetGroupBrain(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupBrain + self.datasets.update(DatasetGroupBrain(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py new file mode 100644 index 000000000..24a8c4c6a --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_calvaria_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupCalvaria(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupCalvaria + self.datasets.update(DatasetGroupCalvaria(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py new file mode 100644 index 000000000..07b677bd2 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_cervix_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupCervix(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupCervix + self.datasets.update(DatasetGroupCervix(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py new file mode 100644 index 000000000..6098b30a2 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_chorionicvillus_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupChorionicvillus(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupChorionicvillus + self.datasets.update(DatasetGroupChorionicvillus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py new file mode 100644 index 000000000..ab40519be --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py @@ -0,0 +1,40 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_08_067.human_colon_2019_10x_kinchen_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2019_06_029.human_colon_2019_10x_smilie_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1084_jem_20191130.human_colon_2019_10x_wang_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41590_020_0602_z.human_colon_2020_10x_james_001 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_colon_2020_microwell_han_001 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_colon_2020_microwell_han_002 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_colon_2020_microwell_han_003 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_colon_2020_microwell_han_004 import Dataset as Dataset0008 + + +class DatasetGroupColon(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupColon + self.datasets.update(DatasetGroupColon(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py new file mode 100644 index 000000000..f7ce00833 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_duodenum_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupDuodenum(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupDuodenum + self.datasets.update(DatasetGroupDuodenum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py new file mode 100644 index 000000000..21f9cae8f --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_epityphlon_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupEpityphlon(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupEpityphlon + self.datasets.update(DatasetGroupEpityphlon(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py new file mode 100644 index 000000000..c3300c274 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1186_s13059_019_1906_x.human_esophagus_2019_10x_madissoon_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_esophagus_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_esophagus_2020_microwell_han_002 import Dataset as Dataset0003 + + +class DatasetGroupEsophagus(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupEsophagus + self.datasets.update(DatasetGroupEsophagus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py new file mode 100644 index 000000000..68ee322cb --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py @@ -0,0 +1,32 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_15252_embj_2018100811.human_eye_2019_10x_lukowski_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41467_019_12780_8.human_eye_2019_10x_menon_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1073_pnas_1914143116.human_eye_2019_10x_voigt_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_eye_2020_microwell_han_001 import Dataset as Dataset0004 + + +class DatasetGroupEye(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupEye + self.datasets.update(DatasetGroupEye(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py new file mode 100644 index 000000000..3ed7986a2 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_fallopiantube_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupFallopiantube(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupFallopiantube + self.datasets.update(DatasetGroupFallopiantube(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py new file mode 100644 index 000000000..237ad73e0 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_femalegonad_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_femalegonad_2020_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupFemalegonad(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupFemalegonad + self.datasets.update(DatasetGroupFemalegonad(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py new file mode 100644 index 000000000..0e0a033f4 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_gallbladder_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupGallbladder(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupGallbladder + self.datasets.update(DatasetGroupGallbladder(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py new file mode 100644 index 000000000..2425637cb --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py @@ -0,0 +1,32 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_heart_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_heart_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_heart_2020_microwell_han_003 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_heart_2020_microwell_han_004 import Dataset as Dataset0004 + + +class DatasetGroupHeart(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupHeart + self.datasets.update(DatasetGroupHeart(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py new file mode 100644 index 000000000..c32bd0730 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_hesc_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupHesc(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupHesc + self.datasets.update(DatasetGroupHesc(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py new file mode 100644 index 000000000..93d9ad6b0 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2019_08_008.human_ileum_2019_10x_martin_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1084_jem_20191130.human_ileum_2019_10x_wang_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_ileum_2020_microwell_han_001 import Dataset as Dataset0003 + + +class DatasetGroupIleum(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupIleum + self.datasets.update(DatasetGroupIleum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py new file mode 100644 index 000000000..d9a7fb8e6 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_jejunum_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupJejunum(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupJejunum + self.datasets.update(DatasetGroupJejunum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py new file mode 100644 index 000000000..2a6f1bd94 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py @@ -0,0 +1,44 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41467_019_10861_2.human_kidney_2019_10xSn_lake_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1126_science_aat5031.human_kidney_2019_10x_stewart_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41597_019_0351_8.human_kidney_2020_10x_liao_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_001 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_002 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_003 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_004 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_005 import Dataset as Dataset0008 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_006 import Dataset as Dataset0009 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_007 import Dataset as Dataset0010 + + +class DatasetGroupKidney(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0010(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupKidney + self.datasets.update(DatasetGroupKidney(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py new file mode 100644 index 000000000..4674aedc0 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py @@ -0,0 +1,42 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41467_018_06318_7.human_liver_2018_10x_macparland_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_019_1652_y.human_liver_2019_10x_popescu_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_019_1631_3.human_liver_2019_10x_ramachandran_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_019_1373_2.human_liver_2019_mCELSeq2_aizarani_001 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_001 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_002 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_003 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_004 import Dataset as Dataset0008 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_005 import Dataset as Dataset0009 + + +class DatasetGroupLiver(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupLiver + self.datasets.update(DatasetGroupLiver(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py new file mode 100644 index 000000000..2d312fd3e --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py @@ -0,0 +1,54 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41591_019_0468_5.human_lung_2019_10x_braga_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41591_019_0468_5.human_lung_2019_10x_braga_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1186_s13059_019_1906_x.human_lung_2019_10x_madissoon_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41591_019_0468_5.human_lung_2019_dropseq_braga_003 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1101_753806.human_lung_2020_10x_habermann_001 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1101_2020_03_13_991455.human_lung_2020_10x_lukassen_001 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1101_2020_03_13_991455.human_lung_2020_10x_lukassen_002 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1016_j_devcel_2020_01_033.human_lung_2020_10x_miller_001 import Dataset as Dataset0008 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2922_4.human_lung_2020_10x_travaglini_001 import Dataset as Dataset0009 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_001 import Dataset as Dataset0010 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_002 import Dataset as Dataset0011 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_003 import Dataset as Dataset0012 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_004 import Dataset as Dataset0013 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_005 import Dataset as Dataset0014 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2922_4.human_lung_2020_smartseq2_travaglini_002 import Dataset as Dataset0015 + + +class DatasetGroupLung(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0010(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0011(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0012(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0013(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0014(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0015(path=path, meta_path=meta_path, cache_path=cache_path), + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupLung + self.datasets.update(DatasetGroupLung(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py new file mode 100644 index 000000000..0b607e309 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41422_018_0099_2.human_malegonad_2018_10x_guo_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_malegonad_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_malegonad_2020_microwell_han_002 import Dataset as Dataset0003 + + +class DatasetGroupMalegonad(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupMalegonad + self.datasets.update(DatasetGroupMalegonad(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_mixed.py b/sfaira/data/dataloaders/anatomical_groups/human/human_mixed.py new file mode 100644 index 000000000..018f0c413 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_mixed.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41467_019_12464_3.human_mixed_2019_10x_szabo_001 import Dataset as Dataset0001 + + +class DatasetGroupMixed(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupMixed + self.datasets.update(DatasetGroupMixed(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py new file mode 100644 index 000000000..6ca10dad9 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_muscle_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_muscle_2020_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupMuscle(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupMuscle + self.datasets.update(DatasetGroupMuscle(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py new file mode 100644 index 000000000..1af19a624 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_omentum_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_omentum_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_omentum_2020_microwell_han_003 import Dataset as Dataset0003 + + +class DatasetGroupOmentum(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupOmentum + self.datasets.update(DatasetGroupOmentum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py new file mode 100644 index 000000000..08b067dd7 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py @@ -0,0 +1,38 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cels_2016_08_011.human_pancreas_2016_indrop_baron_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2016_08_020.human_pancreas_2016_smartseq2_segerstolpe_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2017_09_004.human_pancreas_2017_smartseq2_enge_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pancreas_2020_microwell_han_001 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pancreas_2020_microwell_han_002 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pancreas_2020_microwell_han_003 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pancreas_2020_microwell_han_004 import Dataset as Dataset0007 + + +class DatasetGroupPancreas(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupPancreas + self.datasets.update(DatasetGroupPancreas(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py new file mode 100644 index 000000000..106b9cd20 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_018_0698_6.human_placenta_2018_smartseq2_ventotormo_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_018_0698_6.human_placenta_2018_10x_ventotormo_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_placenta_2020_microwell_han_001 import Dataset as Dataset0003 + + +class DatasetGroupPlacenta(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupPlacenta + self.datasets.update(DatasetGroupPlacenta(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py new file mode 100644 index 000000000..61a2f6be7 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pleura_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupPleura(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupPleura + self.datasets.update(DatasetGroupPleura(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py new file mode 100644 index 000000000..3ed1f9a44 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_celrep_2018_11_086.human_prostate_2018_10x_henry_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_prostate_2020_microwell_han_001 import Dataset as Dataset0002 + + +class DatasetGroupProstate(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupProstate + self.datasets.update(DatasetGroupProstate(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py new file mode 100644 index 000000000..67ee06c82 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1084_jem_20191130.human_rectum_2019_10x_wang_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_rectum_2020_microwell_han_001 import Dataset as Dataset0002 + + +class DatasetGroupRectum(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupRectum + self.datasets.update(DatasetGroupRectum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py new file mode 100644 index 000000000..a39e0646a --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_rib_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_rib_2020_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupRib(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupRib + self.datasets.update(DatasetGroupRib(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py new file mode 100644 index 000000000..30985fe65 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_skin_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_skin_2020_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupSkin(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupSkin + self.datasets.update(DatasetGroupSkin(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py new file mode 100644 index 000000000..4434146c8 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_spinalcord_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupSpinalcord(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupSpinalcord + self.datasets.update(DatasetGroupSpinalcord(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py new file mode 100644 index 000000000..fa36e2bf7 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1186_s13059_019_1906_x.human_spleen_2019_10x_madissoon_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_spleen_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_spleen_2020_microwell_han_002 import Dataset as Dataset0003 + + +class DatasetGroupSpleen(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupSpleen + self.datasets.update(DatasetGroupSpleen(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py new file mode 100644 index 000000000..b6030d318 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py @@ -0,0 +1,44 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_003 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_004 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_005 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_006 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_007 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_008 import Dataset as Dataset0008 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_009 import Dataset as Dataset0009 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_010 import Dataset as Dataset0010 + + +class DatasetGroupStomach(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0010(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupStomach + self.datasets.update(DatasetGroupStomach(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py new file mode 100644 index 000000000..9ece40261 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1126_science_aay3224.human_thymus_2020_10x_park_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_thymus_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_thymus_2020_microwell_han_002 import Dataset as Dataset0003 + + +class DatasetGroupThymus(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupThymus + self.datasets.update(DatasetGroupThymus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py new file mode 100644 index 000000000..e521b4f37 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_thyroid_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_thyroid_2020_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupThyroid(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupThyroid + self.datasets.update(DatasetGroupThyroid(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py new file mode 100644 index 000000000..1fb26ad18 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_trachea_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupTrachea(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupTrachea + self.datasets.update(DatasetGroupTrachea(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py new file mode 100644 index 000000000..143f88545 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_ureter_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupUreter(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupUreter + self.datasets.update(DatasetGroupUreter(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py new file mode 100644 index 000000000..303e1ed50 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_uterus_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupUterus(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupUterus + self.datasets.update(DatasetGroupUterus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/__init__.py b/sfaira/data/dataloaders/anatomical_groups/mouse/__init__.py new file mode 100644 index 000000000..8d8c1569d --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/__init__.py @@ -0,0 +1,27 @@ +from .mouse_bladder import DatasetGroupBladder +from .mouse_brain import DatasetGroupBrain +from .mouse_diaphragm import DatasetGroupDiaphragm +from .mouse_adipose import DatasetGroupAdipose +from .mouse_heart import DatasetGroupHeart +from .mouse_kidney import DatasetGroupKidney +from .mouse_colon import DatasetGroupColon +from .mouse_muscle import DatasetGroupMuscle +from .mouse_liver import DatasetGroupLiver +from .mouse_lung import DatasetGroupLung +from .mouse_mammarygland import DatasetGroupMammaryGland +from .mouse_bone import DatasetGroupBone +from .mouse_femalegonad import DatasetGroupFemalegonad +from .mouse_pancreas import DatasetGroupPancreas +from .mouse_placenta import DatasetGroupPlacenta +from .mouse_blood import DatasetGroupBlood +from .mouse_prostate import DatasetGroupProstate +from .mouse_rib import DatasetGroupRib +from .mouse_ileum import DatasetGroupIleum +from .mouse_skin import DatasetGroupSkin +from .mouse_spleen import DatasetGroupSpleen +from .mouse_stomach import DatasetGroupStomach +from .mouse_malegonad import DatasetGroupMalegonad +from .mouse_thymus import DatasetGroupThymus +from .mouse_tongue import DatasetGroupTongue +from .mouse_trachea import DatasetGroupTrachea +from .mouse_uterus import DatasetGroupUterus diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/external.py b/sfaira/data/dataloaders/anatomical_groups/mouse/external.py new file mode 100644 index 000000000..413092483 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/external.py @@ -0,0 +1,2 @@ +from sfaira.data import DatasetBase, DatasetGroup +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py new file mode 100644 index 000000000..f7532d03c --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py @@ -0,0 +1,35 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_smartseq2_pisco_002 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_smartseq2_pisco_003 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_smartseq2_pisco_004 import Dataset as Dataset0005 + + +class DatasetGroupAdipose(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupAdipose + self.datasets.update(DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py new file mode 100644 index 000000000..6576b2987 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_bladder_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_bladder_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_bladder_2018_microwell_han_001 import Dataset as Dataset0003 + + +class DatasetGroupBladder(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupBladder + self.datasets.update(DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py new file mode 100644 index 000000000..35638cefa --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py @@ -0,0 +1,35 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_003 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_004 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_005 import Dataset as Dataset0005 + + +class DatasetGroupBlood (DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupBlood + self.datasets.update(DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py new file mode 100644 index 000000000..f2135c456 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_bone_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_bone_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_bone_2018_microwell_001 import Dataset as Dataset0003 + + +class DatasetGroupBone(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupBone + self.datasets.update(DatasetGroupBone(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py new file mode 100644 index 000000000..5b932749e --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py @@ -0,0 +1,33 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_brain_2019_smartseq2_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_brain_2019_smartseq2_pisco_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_brain_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_brain_2018_microwell_han_002 import Dataset as Dataset0004 + + +class DatasetGroupBrain(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupBrain + self.datasets.update(DatasetGroupBrain(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py new file mode 100644 index 000000000..6507cd3fd --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_colon_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_colon_2019_smartseq2_pisco_001 import Dataset as Dataset0002 + + +class DatasetGroupColon(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupColon + self.datasets.update(DatasetGroupColon(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py new file mode 100644 index 000000000..7c69001d0 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py @@ -0,0 +1,27 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_diaphragm_2019_smartseq2_pisco_001 import Dataset as Dataset0001 + + +class DatasetGroupDiaphragm(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupDiaphragm + self.datasets.update(DatasetGroupDiaphragm(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py new file mode 100644 index 000000000..b2d687412 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_femalegonad_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_femalegonad_2018_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupFemalegonad(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupFemalegonad + self.datasets.update(DatasetGroupFemalegonad(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py new file mode 100644 index 000000000..50458cd02 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py @@ -0,0 +1,32 @@ +import os +from typing import Union + +from .external import DatasetGroup + + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_heart_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_heart_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_heart_2019_smartseq2_pisco_002 import Dataset as Dataset0003 + + +class DatasetGroupHeart(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupHeart + self.datasets.update(DatasetGroupHeart(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py new file mode 100644 index 000000000..bcd9fd9ca --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_ileum_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_ileum_2018_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_ileum_2018_microwell_han_003 import Dataset as Dataset0003 + + +class DatasetGroupIleum(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupIleum + self.datasets.update(DatasetGroupIleum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py new file mode 100644 index 000000000..ce6788cd6 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py @@ -0,0 +1,33 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_kidney_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_kidney_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_kidney_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_kidney_2018_microwell_han_002 import Dataset as Dataset0004 + + +class DatasetGroupKidney(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupKidney + self.datasets.update(DatasetGroupKidney(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py new file mode 100644 index 000000000..e9915b36b --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py @@ -0,0 +1,33 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_liver_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_liver_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_liver_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_liver_2018_microwell_han_002 import Dataset as Dataset0004 + + +class DatasetGroupLiver(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupLiver + self.datasets.update(DatasetGroupLiver(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py new file mode 100644 index 000000000..b6e6c9e5c --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py @@ -0,0 +1,35 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_lung_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_lung_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_lung_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_lung_2018_microwell_han_002 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_lung_2018_microwell_han_003 import Dataset as Dataset0005 + + +class DatasetGroupLung(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupLung + self.datasets.update(DatasetGroupLung(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py new file mode 100644 index 000000000..8e62116b5 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_malegonad_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_malegonad_2018_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupMalegonad(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupMalegonad + self.datasets.update(DatasetGroupMalegonad(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py new file mode 100644 index 000000000..e5bd9eb2e --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py @@ -0,0 +1,37 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_mammarygland_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_mammarygland_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_mammarygland_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_mammarygland_2018_microwell_han_002 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_mammarygland_2018_microwell_han_003 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_mammarygland_2018_microwell_han_004 import Dataset as Dataset0006 + + +class DatasetGroupMammaryGland(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupMammaryGland + self.datasets.update(DatasetGroupMammaryGland(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py new file mode 100644 index 000000000..6e1deee58 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_muscle_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_muscle_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_muscle_2018_microwell_han_001 import Dataset as Dataset0003 + + +class DatasetGroupMuscle(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupMuscle + self.datasets.update(DatasetGroupMuscle(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py new file mode 100644 index 000000000..a70918270 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py @@ -0,0 +1,47 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_pancreas_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_pancreas_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_pancreas_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_001 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_002 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_003 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_004 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_005 import Dataset as Dataset0008 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_006 import Dataset as Dataset0009 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_007 import Dataset as Dataset0010 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_008 import Dataset as Dataset0011 + + +class DatasetGroupPancreas(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0010(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0011(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupPancreas + self.datasets.update(DatasetGroupPancreas(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py new file mode 100644 index 000000000..04a87566d --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_placenta_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_placenta_2018_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupPlacenta(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupPlacenta + self.datasets.update(DatasetGroupPlacenta(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py new file mode 100644 index 000000000..a816076de --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_prostate_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_prostate_2018_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupProstate(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupProstate + self.datasets.update(DatasetGroupProstate(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py new file mode 100644 index 000000000..069b179df --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_rib_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_rib_2018_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_rib_2018_microwell_han_003 import Dataset as Dataset0003 + + +class DatasetGroupRib(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupRib + self.datasets.update(DatasetGroupRib(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py new file mode 100644 index 000000000..d0a012add --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_skin_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_skin_2019_smartseq2_pisco_001 import Dataset as Dataset0002 + + +class DatasetGroupSkin(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupSkin + self.datasets.update(DatasetGroupSkin(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py new file mode 100644 index 000000000..8bd62249a --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_spleen_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_spleen_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_spleen_2018_microwell_han_001 import Dataset as Dataset0003 + + +class DatasetGroupSpleen(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupSpleen + self.datasets.update(DatasetGroupSpleen(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py new file mode 100644 index 000000000..15b2fcd63 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py @@ -0,0 +1,27 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_stomach_2018_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupStomach(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupStomach + self.datasets.update(DatasetGroupStomach(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py new file mode 100644 index 000000000..a2f30d3df --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_thymus_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_thymus_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_thymus_2018_microwell_han_001 import Dataset as Dataset0003 + + +class DatasetGroupThymus(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupThymus + self.datasets.update(DatasetGroupThymus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py new file mode 100644 index 000000000..695f666de --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_tongue_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_tongue_2019_smartseq2_pisco_001 import Dataset as Dataset0002 + + +class DatasetGroupTongue(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupTongue + self.datasets.update(DatasetGroupTongue(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py new file mode 100644 index 000000000..eaff5c910 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_trachea_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_trachea_2019_smartseq2_pisco_001 import Dataset as Dataset0002 + + +class DatasetGroupTrachea(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupTrachea + self.datasets.update(DatasetGroupTrachea(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py new file mode 100644 index 000000000..7513bf339 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_uterus_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_uterus_2018_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupUterus(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupUterus + self.datasets.update(DatasetGroupUterus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/databases/__init__.py b/sfaira/data/dataloaders/databases/__init__.py new file mode 100644 index 000000000..328e18fc8 --- /dev/null +++ b/sfaira/data/dataloaders/databases/__init__.py @@ -0,0 +1 @@ +from .super_group import DatasetSuperGroupDatabases diff --git a/sfaira/data/dataloaders/databases/cellxgene/__init__.py b/sfaira/data/dataloaders/databases/cellxgene/__init__.py new file mode 100644 index 000000000..2d6a4a900 --- /dev/null +++ b/sfaira/data/dataloaders/databases/cellxgene/__init__.py @@ -0,0 +1,2 @@ +from sfaira.data.dataloaders.databases.cellxgene.cellxgene_group import DatasetGroup +from sfaira.data.dataloaders.databases.cellxgene.cellxgene_loader import Dataset diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py new file mode 100644 index 000000000..41328d40c --- /dev/null +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py @@ -0,0 +1,26 @@ +import os +from typing import Union + +from .external import ADATA_IDS_CELLXGENE, DatasetGroup + +from .cellxgene_loader import Dataset + + +class DatasetGroup(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + self._ADATA_IDS_CELLXGENE = ADATA_IDS_CELLXGENE() + + fn_ls = os.listdir(path) + fn_ls = [x for x in fn_ls if x in self._ADATA_IDS_CELLXGENE.accepted_file_names] + datasets = [ + Dataset(path=path, fn=x, meta_path=meta_path, cache_path=cache_path) + for x in fn_ls + ] + keys = [x.id for x in datasets] + super().__init__(dict(zip(keys, datasets))) diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py new file mode 100644 index 000000000..48429f7ea --- /dev/null +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py @@ -0,0 +1,70 @@ +import anndata +import os +from typing import Union +from .external import DatasetBase +from .external import ADATA_IDS_CELLXGENE + + +class Dataset(DatasetBase): + """ + This is a dataloader for downloaded h5ad from cellxgene. + + :param path: + :param meta_path: + :param kwargs: + """ + + def __init__( + self, + path: Union[str, None], + fn: str, + meta_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, **kwargs) + self._ADATA_IDS_CELLXGENE = ADATA_IDS_CELLXGENE() + self.fn = fn + + self.obs_key_cellontology_class = self._ADATA_IDS_CELLXGENE.cell_ontology_class + self.obs_key_cellontology_id = self._ADATA_IDS_CELLXGENE.cell_ontology_id + self.obs_key_cellontology_original = self._ADATA_IDS_CELLXGENE.cell_types_original + self.obs_key_dev_stage = self._ADATA_IDS_CELLXGENE.dev_stage + self.obs_key_ethnicity = self._ADATA_IDS_CELLXGENE.ethnicity + self.obs_key_healthy = self._ADATA_IDS_CELLXGENE.healthy + self.obs_key_sex = self._ADATA_IDS_CELLXGENE.sex + self.obs_key_organism = self._ADATA_IDS_CELLXGENE.organism + self.obs_key_state_exact = self._ADATA_IDS_CELLXGENE.state_exact + + self.healthy_state_healthy = self._ADATA_IDS_CELLXGENE.disease_state_healthy + + self.var_ensembl_col = self._ADATA_IDS_CELLXGENE.gene_id_ensembl + self.var_symbol_col = self._ADATA_IDS_CELLXGENE.gene_id_names + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + """ + Note that in contrast to data set specific data loaders, here, the core attributes are only identified from + the data in this function and are not already set in the constructor. These attributes can still be + used through meta data containers after the data was loaded once. + + :param fn: + :return: + """ + fn = os.path.join(self.path, self.fn) + adata = anndata.read(fn) + adata.X = adata.raw.X + # TODO delete raw? + + self.author = adata.uns[self._ADATA_IDS_CELLXGENE.author][self._ADATA_IDS_CELLXGENE.author_names] + self.doi = adata.uns[self._ADATA_IDS_CELLXGENE.doi] + self.download = self.download + self.id = self.id + self.normalization = 'raw' + self.organ = str(self.fn).split("_")[3] # TODO interface this properly + # self.organ = adata.obs["tissue"].values[0] + self.organism = adata.obs[self._ADATA_IDS_CELLXGENE.organism].values[0] + self.protocol = adata.obs[self._ADATA_IDS_CELLXGENE.protocol].values[0] + self.year = adata.uns[self._ADATA_IDS_CELLXGENE.year] diff --git a/sfaira/data/dataloaders/databases/cellxgene/external.py b/sfaira/data/dataloaders/databases/cellxgene/external.py new file mode 100644 index 000000000..c7a6982b7 --- /dev/null +++ b/sfaira/data/dataloaders/databases/cellxgene/external.py @@ -0,0 +1,3 @@ +from sfaira.data import DatasetBase, DatasetGroup +from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE +from sfaira.consts import META_DATA_FIELDS diff --git a/sfaira/data/dataloaders/databases/super_group.py b/sfaira/data/dataloaders/databases/super_group.py new file mode 100644 index 000000000..df0605579 --- /dev/null +++ b/sfaira/data/dataloaders/databases/super_group.py @@ -0,0 +1,22 @@ +from typing import Union + +from sfaira.data import DatasetSuperGroup +from sfaira.data.dataloaders.databases.cellxgene import DatasetGroup as DatasetGroupCellxgene + + +class DatasetSuperGroupDatabases(DatasetSuperGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + ): + dataset_groups = [] + # List all data bases here: + dataset_groups.append(DatasetGroupCellxgene( + path=path, + meta_path=meta_path, + cache_path=cache_path + )) + super().__init__(dataset_groups=dataset_groups) diff --git a/sfaira/data/dataloaders/loaders/__init__.py b/sfaira/data/dataloaders/loaders/__init__.py new file mode 100644 index 000000000..cf0bdc722 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/__init__.py @@ -0,0 +1 @@ +from .super_group import DatasetSuperGroupLoaders diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py new file mode 100644 index 000000000..75444b20e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py @@ -0,0 +1,87 @@ +import anndata +import os +from typing import Union +import tarfile +import gzip +from io import StringIO +import anndata as ad +import pandas as pd +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2017_smartseq2_enge_001_10.1016/j.cell.2017.09.004" + + self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/suppl/GSE81547_RAW.tar" + self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/matrix/GSE81547_series_matrix.txt.gz" + + self.author = "Quake" + self.doi = "10.1016/j.cell.2017.09.004" + self.healthy = True + self.normalization = "raw" + self.protocol = "Smartseq2" + self.organ = "pancreas" # ToDo: "islet of Langerhans" + self.organism = "human" + self.state_exact = "healthy" + self.year = 2017 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "celltype" + + self.class_maps = { + "0": { + "alpha": "Alpha cell", + "acinar": "Acinar cell", + "ductal": "Ductal cell", + "beta": "Beta cell", + "unsure": "Unknown", + "delta": "Delta cell", + "mesenchymal": "Mesenchymal Cell" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "pancreas", "GSE81547_RAW.tar"), + os.path.join(self.path, "human", "pancreas", "GSE81547_series_matrix.txt.gz") + ] + dfs = [] + with tarfile.open(fn[0]) as tar: + for member in tar.getmembers(): + d = pd.read_csv(tar.extractfile(member), compression="gzip", header=None, sep="\t", index_col=0, + names=[member.name.split("_")[0]]) + dfs.append(d) + self.adata = ad.AnnData(pd.concat(dfs, axis=1).iloc[1:-6].T) + self.adata.X = scipy.sparse.csc_matrix(self.adata.X) + with gzip.open(fn[1]) as f: + file_content = [i.decode("utf-8") for i in f.readlines()] + inputstring = "" + for line in file_content: + if "ID_REF" in line: + inputstring += line + if "!Sample_title" in line: + inputstring += line[1:] + if "!Sample_characteristics_ch1\t\"inferred_cell_type: alpha" in line: + inputstring += line[1:] + data = StringIO(inputstring) + d = pd.read_csv(data, sep="\t").T + d.columns = d.iloc[0] + d.drop("Sample_title", inplace=True) + d = d.reset_index().set_index("ID_REF") + d.columns.name = None + d.index.name = None + self.adata.obs["celltype"] = [d.loc[i]["Sample_characteristics_ch1"].split(": ")[1] for i in self.adata.obs.index] + self.adata.obs["patient"] = ["_".join(d.loc[i]["index"].split("_")[:2]) for i in self.adata.obs.index] diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py new file mode 100644 index 000000000..e397eecf3 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py @@ -0,0 +1,43 @@ +import anndata +import numpy as np +import pandas +from typing import Union +from sfaira.data import DatasetBase + + +class Dataset_d10_1016_j_cell_2018_02_001(DatasetBase): + """ + This is a dataloader template for mca data. + """ + + def __init__( + self, + path: Union[str, None], + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.obs_key_cellontology_class = "Annotation" + self.obs_key_cellontology_original = "Annotation" + + self.author = "Guo" + self.doi = "10.1016/j.cell.2018.02.001" + self.normalization = "raw" + self.healthy = True + self.organism = "mouse" + self.protocol = "microwell-seq" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "index" + + def _load_generalized(self, fn, fn_meta): + celltypes = pandas.read_csv(fn_meta, index_col=1) + celltypes = celltypes.drop(["Unnamed: 0"], axis=1) + + data = pandas.read_csv(fn, sep=" ", header=0) + self.adata = anndata.AnnData(data.T) + self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() + self.adata.obs = celltypes.loc[self.adata.obs_names, :] diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py new file mode 100644 index 000000000..347120a7d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py @@ -0,0 +1,48 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_bladder_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "bladder" + + self.class_maps = { + "0": { + "Endothelial cell_Ly6c1 high(Bladder)": "endothelial cell", + "Vascular endothelial cell(Bladder)": "endothelial cell", + "Urothelium(Bladder)": "bladder urothelial cell", + "Dendritic cell_Cd74 high(Bladder)": "dendritic cell", + "Dendritic cell_Lyz2 high(Bladder)": "dendritic cell", + "Macrophage_Pf4 high(Bladder)": "macrophage", + "NK cell(Bladder)": "NK cell", + "Basal epithelial cell(Bladder)": "basal epithelial cell", + "Epithelial cell_Upk3a high(Bladder)": "epithelial cell", + "Epithelial cell_Gm23935 high(Bladder)": "epithelial cell", + "Mesenchymal stromal cell(Bladder)": "mesenchymal stromal cell", + "Stromal cell_Dpt high(Bladder)": "stromal cell", + "Stromal cell_Car3 high(Bladder)": "stromal cell", + "Smooth muscle cell(Bladder)": "smooth muscle cell", + "Vascular smooth muscle progenitor cell(Bladder)": "smooth muscle cell", + "Umbrella cell(Bladder)": "umbrella cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Bladder_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py new file mode 100644 index 000000000..4c4aa21e8 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py @@ -0,0 +1,53 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "blood" + + self.class_maps = { + "0": { + "B cell_Igha high(Peripheral_Blood)": "B cell", + "B cell_Ly6d high(Peripheral_Blood)": "B cell", + "B cell_Rps27rt high(Peripheral_Blood)": "B cell", + "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", + "Basophil_Prss34 high(Peripheral_Blood)": "basophil", + "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", + "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", + "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", + "Macrophage_Ace high(Peripheral_Blood)": "macrophage", + "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", + "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", + "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", + "Monocyte_Elane high(Peripheral_Blood)": "monocyte", + "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", + "NK cell_Gzma high(Peripheral_Blood)": "NK cell", + "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", + "T cell_Gm14303 high(Peripheral_Blood)": "T cell", + "T cell_Trbc2 high(Peripheral_Blood)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py new file mode 100644 index 000000000..980024416 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py @@ -0,0 +1,53 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "blood" + + self.class_maps = { + "0": { + "B cell_Igha high(Peripheral_Blood)": "B cell", + "B cell_Ly6d high(Peripheral_Blood)": "B cell", + "B cell_Rps27rt high(Peripheral_Blood)": "B cell", + "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", + "Basophil_Prss34 high(Peripheral_Blood)": "basophil", + "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", + "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", + "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", + "Macrophage_Ace high(Peripheral_Blood)": "macrophage", + "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", + "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", + "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", + "Monocyte_Elane high(Peripheral_Blood)": "monocyte", + "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", + "NK cell_Gzma high(Peripheral_Blood)": "NK cell", + "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", + "T cell_Gm14303 high(Peripheral_Blood)": "T cell", + "T cell_Trbc2 high(Peripheral_Blood)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py new file mode 100644 index 000000000..10069693f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py @@ -0,0 +1,53 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "blood" + + self.class_maps = { + "0": { + "B cell_Igha high(Peripheral_Blood)": "B cell", + "B cell_Ly6d high(Peripheral_Blood)": "B cell", + "B cell_Rps27rt high(Peripheral_Blood)": "B cell", + "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", + "Basophil_Prss34 high(Peripheral_Blood)": "basophil", + "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", + "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", + "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", + "Macrophage_Ace high(Peripheral_Blood)": "macrophage", + "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", + "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", + "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", + "Monocyte_Elane high(Peripheral_Blood)": "monocyte", + "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", + "NK cell_Gzma high(Peripheral_Blood)": "NK cell", + "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", + "T cell_Gm14303 high(Peripheral_Blood)": "T cell", + "T cell_Trbc2 high(Peripheral_Blood)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py new file mode 100644 index 000000000..c0602d28a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py @@ -0,0 +1,53 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "blood" + + self.class_maps = { + "0": { + "B cell_Igha high(Peripheral_Blood)": "B cell", + "B cell_Ly6d high(Peripheral_Blood)": "B cell", + "B cell_Rps27rt high(Peripheral_Blood)": "B cell", + "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", + "Basophil_Prss34 high(Peripheral_Blood)": "basophil", + "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", + "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", + "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", + "Macrophage_Ace high(Peripheral_Blood)": "macrophage", + "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", + "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", + "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", + "Monocyte_Elane high(Peripheral_Blood)": "monocyte", + "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", + "NK cell_Gzma high(Peripheral_Blood)": "NK cell", + "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", + "T cell_Gm14303 high(Peripheral_Blood)": "T cell", + "T cell_Trbc2 high(Peripheral_Blood)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood4_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py new file mode 100644 index 000000000..ae8f80e2f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py @@ -0,0 +1,53 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "blood" + + self.class_maps = { + "0": { + "B cell_Igha high(Peripheral_Blood)": "B cell", + "B cell_Ly6d high(Peripheral_Blood)": "B cell", + "B cell_Rps27rt high(Peripheral_Blood)": "B cell", + "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", + "Basophil_Prss34 high(Peripheral_Blood)": "basophil", + "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", + "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", + "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", + "Macrophage_Ace high(Peripheral_Blood)": "macrophage", + "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", + "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", + "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", + "Monocyte_Elane high(Peripheral_Blood)": "monocyte", + "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", + "NK cell_Gzma high(Peripheral_Blood)": "NK cell", + "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", + "T cell_Gm14303 high(Peripheral_Blood)": "T cell", + "T cell_Trbc2 high(Peripheral_Blood)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood5_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py new file mode 100644 index 000000000..baa4cb60e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py @@ -0,0 +1,50 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.id = "mouse_bone_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "bone" + + self.class_maps = { + "0": { + "B cell_Igkc high(Bone-Marrow)": "naive B cell", + "Dendritic cell_H2-Eb1 high(Bone-Marrow)": "dendritic cell", + "Dendritic cell_Siglech high(Bone-Marrow)": "dendritic cell", + "Macrophage_Ms4a6c high(Bone-Marrow)": "macrophage", + "Macrophage_S100a4 high(Bone-Marrow)": "macrophage", + "Erythroblast(Bone-Marrow)": "erythroid progenitor", + "Mast cell(Bone-Marrow)": "mast cell", + "Monocyte_Mif high(Bone-Marrow)": "monocyte", + "Monocyte_Prtn3 high(Bone-Marrow)": "monocyte", + "Neutrophil progenitor(Bone-Marrow)": "neutrophil progenitor", + "Neutrophil_Cebpe high(Bone-Marrow)": "neutrophil", + "Neutrophil_Fcnb high(Bone-Marrow)": "neutrophil", + "Neutrophil_Mmp8 high(Bone-Marrow)": "neutrophil", + "Neutrophil_Ngp high(Bone-Marrow)": "neutrophil", + "Hematopoietic stem progenitor cell(Bone-Marrow)": "hematopoietic precursor cell", + "Pre-pro B cell(Bone-Marrow)": "early pro-B cell", + "T cell_Ms4a4b high(Bone-Marrow)": "CD4-positive, alpha-beta T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "BoneMarrow1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py new file mode 100644 index 000000000..a96602c7c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py @@ -0,0 +1,46 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_brain_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "brain" + + self.class_maps = { + "0": { + "Astroglial cell(Bergman glia)(Brain)": "Bergmann glial cell", + "Astrocyte_Atp1b2 high(Brain)": "astrocyte", + "Astrocyte_Mfe8 high(Brain)": "astrocyte", + "Astrocyte_Pla2g7 high(Brain)": "astrocyte", + "Granulocyte_Ngp high(Brain)": "granulocyte", + "Hypothalamic ependymal cell(Brain)": "ependymal cell", + "Macrophage_Klf2 high(Brain)": "macrophage", + "Macrophage_Lyz2 high(Brain)": "macrophage", + "Microglia(Brain)": "microglial cell", + "Myelinating oligodendrocyte(Brain)": "oligodendrocyte", + "Oligodendrocyte precursor cell(Brain)": "oligodendrocyte precursor cell", + "Neuron(Brain)": "neuron", + "Pan-GABAergic(Brain)": "GABAergic cell", + "Schwann cell(Brain)": "schwann cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py new file mode 100644 index 000000000..7d188840b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py @@ -0,0 +1,46 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_brain_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "brain" + + self.class_maps = { + "0": { + "Astroglial cell(Bergman glia)(Brain)": "Bergmann glial cell", + "Astrocyte_Atp1b2 high(Brain)": "astrocyte", + "Astrocyte_Mfe8 high(Brain)": "astrocyte", + "Astrocyte_Pla2g7 high(Brain)": "astrocyte", + "Granulocyte_Ngp high(Brain)": "granulocyte", + "Hypothalamic ependymal cell(Brain)": "ependymal cell", + "Macrophage_Klf2 high(Brain)": "macrophage", + "Macrophage_Lyz2 high(Brain)": "macrophage", + "Microglia(Brain)": "microglial cell", + "Myelinating oligodendrocyte(Brain)": "oligodendrocyte", + "Oligodendrocyte precursor cell(Brain)": "oligodendrocyte precursor cell", + "Neuron(Brain)": "neuron", + "Pan-GABAergic(Brain)": "GABAergic cell", + "Schwann cell(Brain)": "schwann cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py new file mode 100644 index 000000000..db4d0801e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py @@ -0,0 +1,46 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_femalegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "femalegonad" + + self.class_maps = { + "0": { + "Cumulus cell_Car14 high(Ovary)": "cumulus cell", + "Cumulus cell_Nupr1 high(Ovary)": "cumulus cell", + "Cumulus cell_Ube2c high(Ovary)": "cumulus cell", + "Granulosa cell_Inhba high(Ovary)": "granulosa cell", + "Granulosa cell_Kctd14 high(Ovary)": "granulosa cell", + "Large luteal cell(Ovary)": "large luteal cell", + "Macrophage_Lyz2 high(Ovary)": "macrophage", + "Marcrophage_Cd74 high(Ovary)": "macrophage", + "Ovarian surface epithelium cell(Ovary)": "epithelial cell of ovarian surface", + "Ovarian vascular surface endothelium cell(Ovary)": "endothelial cell of ovarian surface", + "Small luteal cell(Ovary)": "small luteal cell", + "Stroma cell (Ovary)": "stromal cell", + "Thecal cell(Ovary)": "thecal cell", + "luteal cells(Ovary)": "luteal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py new file mode 100644 index 000000000..affa74f21 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py @@ -0,0 +1,46 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_femalegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "femalegonad" + + self.class_maps = { + "0": { + "Cumulus cell_Car14 high(Ovary)": "cumulus cell", + "Cumulus cell_Nupr1 high(Ovary)": "cumulus cell", + "Cumulus cell_Ube2c high(Ovary)": "cumulus cell", + "Granulosa cell_Inhba high(Ovary)": "granulosa cell", + "Granulosa cell_Kctd14 high(Ovary)": "granulosa cell", + "Large luteal cell(Ovary)": "large luteal cell", + "Macrophage_Lyz2 high(Ovary)": "macrophage", + "Marcrophage_Cd74 high(Ovary)": "macrophage", + "Ovarian surface epithelium cell(Ovary)": "epithelial cell of ovarian surface", + "Ovarian vascular surface endothelium cell(Ovary)": "endothelial cell of ovarian surface", + "Small luteal cell(Ovary)": "small luteal cell", + "Stroma cell (Ovary)": "stromal cell", + "Thecal cell(Ovary)": "thecal cell", + "luteal cells(Ovary)": "luteal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py new file mode 100644 index 000000000..2f817c510 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py @@ -0,0 +1,55 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "ileum" + + self.class_maps = { + "0": { + "B cell_Ighd high(Small-Intestine)": "B cell", + "B cell_Igkv12-46 high(Small-Intestine)": "B cell", + "B cell_Jchain high(Small-Intestine)": "B cell", + "B cell_Ms4a1 high(Small-Intestine)": "B cell", + "Columnar epithelium(Small-Intestine)": "epithelial cell", + "Dendritic cell_Siglech high(Small-Intestine)": "dendritic cell", + "Dendrtic cell_Cst3 high(Small-Intestine)": "dendritic cell", + "Epithelial cell_Kcne3 high(Small-Intestine)": "epithelial cell", + "Epithelial cell_Sh2d6 high(Small-Intestine)": "epithelial cell", + "Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)": "epithelial cell villi", + "Macrophage_Apoe high(Small-Intestine)": "macrophage", + "Macrophage_Cxcl2 high(Small-Intestine)": "macrophage", + "Paneth cell(Small-Intestine)": "paneth cell", + "S cell_Chgb high(Small-Intestine)": "enteroendocrine cell", + "S cell_Gip high(Small-Intestine)": "enteroendocrine cell", + "Stromal cell_Adamdec1 high(Small-Intestine)": "stromal cell", + "Stromal cell_Dcn high(Small-Intestine)": "stromal cell", + "T cell_Ccl5 high(Small-Intestine)": "T cell", + "T cell_Icos high(Small-Intestine)": "T cell", + "T cell_Cd7 high(Small-Intestine)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py new file mode 100644 index 000000000..61fb53c0e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py @@ -0,0 +1,55 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "ileum" + + self.class_maps = { + "0": { + "B cell_Ighd high(Small-Intestine)": "B cell", + "B cell_Igkv12-46 high(Small-Intestine)": "B cell", + "B cell_Jchain high(Small-Intestine)": "B cell", + "B cell_Ms4a1 high(Small-Intestine)": "B cell", + "Columnar epithelium(Small-Intestine)": "epithelial cell", + "Dendritic cell_Siglech high(Small-Intestine)": "dendritic cell", + "Dendrtic cell_Cst3 high(Small-Intestine)": "dendritic cell", + "Epithelial cell_Kcne3 high(Small-Intestine)": "epithelial cell", + "Epithelial cell_Sh2d6 high(Small-Intestine)": "epithelial cell", + "Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)": "epithelial cell villi", + "Macrophage_Apoe high(Small-Intestine)": "macrophage", + "Macrophage_Cxcl2 high(Small-Intestine)": "macrophage", + "Paneth cell(Small-Intestine)": "paneth cell", + "S cell_Chgb high(Small-Intestine)": "enteroendocrine cell", + "S cell_Gip high(Small-Intestine)": "enteroendocrine cell", + "Stromal cell_Adamdec1 high(Small-Intestine)": "stromal cell", + "Stromal cell_Dcn high(Small-Intestine)": "stromal cell", + "T cell_Ccl5 high(Small-Intestine)": "T cell", + "T cell_Icos high(Small-Intestine)": "T cell", + "T cell_Cd7 high(Small-Intestine)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py new file mode 100644 index 000000000..08303f3a1 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py @@ -0,0 +1,55 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_ileum_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "ileum" + + self.class_maps = { + "0": { + "B cell_Ighd high(Small-Intestine)": "B cell", + "B cell_Igkv12-46 high(Small-Intestine)": "B cell", + "B cell_Jchain high(Small-Intestine)": "B cell", + "B cell_Ms4a1 high(Small-Intestine)": "B cell", + "Columnar epithelium(Small-Intestine)": "epithelial cell", + "Dendritic cell_Siglech high(Small-Intestine)": "dendritic cell", + "Dendrtic cell_Cst3 high(Small-Intestine)": "dendritic cell", + "Epithelial cell_Kcne3 high(Small-Intestine)": "epithelial cell", + "Epithelial cell_Sh2d6 high(Small-Intestine)": "epithelial cell", + "Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)": "epithelial cell villi", + "Macrophage_Apoe high(Small-Intestine)": "macrophage", + "Macrophage_Cxcl2 high(Small-Intestine)": "macrophage", + "Paneth cell(Small-Intestine)": "paneth cell", + "S cell_Chgb high(Small-Intestine)": "enteroendocrine cell", + "S cell_Gip high(Small-Intestine)": "enteroendocrine cell", + "Stromal cell_Adamdec1 high(Small-Intestine)": "stromal cell", + "Stromal cell_Dcn high(Small-Intestine)": "stromal cell", + "T cell_Ccl5 high(Small-Intestine)": "T cell", + "T cell_Icos high(Small-Intestine)": "T cell", + "T cell_Cd7 high(Small-Intestine)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py new file mode 100644 index 000000000..365e62c50 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.id = "mouse_kidney_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "kidney" + + self.class_maps = { + "0": { + "Cell in cell cycle(Fetal_Kidney)": "fetal proliferative cell", + "Metanephric mesenchyme(Fetal_Kidney)": "fetal mesenchymal cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py new file mode 100644 index 000000000..6b1aa65cd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py @@ -0,0 +1,66 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.id = "mouse_kidney_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "kidney" + + self.class_maps = { + "0": { + "Adipocyte(Fetal_Kidney)": "fetal adipocyte", + "B cell(Kidney)": "B cell", + "Dendritic cell_Ccr7 high(Kidney)": "dendritic cell", + "Dendritic cell_Cst3 high(Kidney)": "dendritic cell", + "Distal collecting duct principal cell_Cldn4 high(Kidney)": "kidney collecting duct principal cell", + "Distal collecting duct principal cell_Hsd11b2 high(Kidney)": "kidney collecting duct principal cell", + "Distal convoluted tubule_Pvalb high(Kidney)": "kidney distal convoluted tubule epithelial cell", + "Distal convoluted tubule_S100g high(Kidney)": "kidney distal convoluted tubule epithelial cell", + "Endothelial cell(Kidney)": "fenestrated cell", + "Epithelial cell_Cryab high(Kidney)": "epithelial cell", + "Fenestrated endothelial cell_Plvap high(Kidney)": "fenestrated cell", + "Fenestrated endothelial cell_Tm4sf1 high(Kidney)": "fenestrated cell", + "Glomerular epithelial cell_Aldh1a2 high(Fetal_Kidney)": "glomerular epithelial cell", + "Intercalated cells of collecting duct_Aqp6 high(Kidney)": "kidney collecting duct epithelial cell", + "Intercalated cells of collecting duct_Slc26a4 high(Kidney)": "kidney collecting duct epithelial cell", + "Macrophage_Ccl4 high (Kidney)": "macrophage", + "Macrophage_Lyz2 high(Kidney)": "macrophage", + "Metanephric mesenchyme(Fetal_Kidney)": "fetal mesenchymal cell", + "Neutrophil progenitor_S100a8 high(Kidney)": "neutrophil progenitor", + "Proximal tubule brush border cell(Kidney)": "brush cell", + "Proximal tubule cell_Cyp4a14 high(Kidney)": "epithelial cell of proximal tubule", + "Proximal tubule cell_Osgin1 high(Kidney)": "epithelial cell of proximal tubule", + "S1 proximal tubule cells(Kidney)": "epithelial cell of proximal tubule", + "S3 proximal tubule cells(Kidney)": "epithelial cell of proximal tubule", + "Stromal cell_Ankrd1 high(Kidney)": "fibroblast", + "Stromal cell_Cxcl10 high(Kidney)": "fibroblast", + "Stromal cell_Dcn high(Kidney)": "fibroblast", + "Stromal cell_Mgp high(Fetal_Kidney)": "fibroblast", + "Stromal cell_Mgp high(Kidney)": "fibroblast", + "Stromal cell_Ptgds high(Kidney)": "fibroblast", + "T cell(Kidney)": "T cell", + "Thick ascending limb of the loop of Henle(Kidney)": "kidney loop of Henle ascending limb epithelial cell", + "Ureteric epithelium(Kidney)": "ureteric epithelial cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py new file mode 100644 index 000000000..345d8a0eb --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py @@ -0,0 +1,53 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.id = "mouse_liver_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "liver" + + self.class_maps = { + "0": { + "B cell_Fcmr high(Liver)": "B cell", + "B cell_Jchain high(Liver)": "B cell", + "Dendritic cell_Cst3 high(Liver)": "dendritic cell", + "Dendritic cell_Siglech high(Liver)": "dendritic cell", + "Endothelial cell(Liver)": "endothelial cell of hepatic sinusoid", + "Epithelial cell(Liver)": "duct epithelial cell", + "Epithelia cell_Spp1 high(Liver)": "duct epithelial cell", + "Erythroblast_Hbb-bs high(Liver)": "erythroblast", + "Erythroblast_Hbb-bt high(Liver)": "erythroblast", + "Granulocyte(Liver)": "granulocyte", + "Hepatocyte_Fabp1 high(Liver)": "hepatocyte", + "Hepatocyte_mt-Nd4 high(Liver)": "hepatocyte", + "Pericentral (PC) hepatocytes(Liver)": "hepatocyte", + "Periportal (PP) hepatocyte(Liver)": "hepatocyte", + "Kuppfer cell(Liver)": "Kupffer cell", + "Macrophage_Chil3 high(Liver)": "macrophage", + "Neutrophil_Ngp high(Liver)": "neutrophil", + "Stromal cell(Liver)": "stromal cell", + "T cell_Gzma high(Liver)": "T cell", + "T cell_Trbc2 high(Liver)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py new file mode 100644 index 000000000..e9223074a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py @@ -0,0 +1,47 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.id = "mouse_liver_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "liver" + + self.class_maps = { + "0": { + "B cell_Jchain high(Liver)": "B cell", + "Dendritic cell_Cst3 high(Liver)": "dendritic cell", + "Dendritic cell_Siglech high(Liver)": "dendritic cell", + "Epithelial cell(Liver)": "duct epithelial cell", + "Epithelia cell_Spp1 high(Liver)": "duct epithelial cell", + "Erythroblast_Hbb-bs high(Liver)": "erythroblast", + "Hepatocyte_Fabp1 high(Liver)": "hepatocyte", + "Pericentral (PC) hepatocytes(Liver)": "hepatocyte", + "Periportal (PP) hepatocyte(Liver)": "hepatocyte", + "Kuppfer cell(Liver)": "Kupffer cell", + "Macrophage_Chil3 high(Liver)": "macrophage", + "Stromal cell(Liver)": "stromal cell", + "T cell_Gzma high(Liver)": "T cell", + "T cell_Trbc2 high(Liver)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py new file mode 100644 index 000000000..8926dbf10 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py @@ -0,0 +1,64 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_lung_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "lung" + + self.class_maps = { + "0": { + "AT1 Cell(Lung)": "alveolar epithelial cell type I", + "AT2 Cell(Lung)": "alveolar epithelial cell type II", + "Alveolar bipotent progenitor(Lung)": "alveolar bipotent progenitor", + "Alveolar macrophage_Ear2 high(Lung)": "alveolar macrophage", + "Alveolar macrophage_Pclaf high(Lung)": "alveolar macrophage", + "B Cell(Lung)": "B cell", + "Basophil(Lung)": "basophil", + "Ciliated cell(Lung)": "ciliated cell", + "Clara Cell(Lung)": "clara cell", + "Conventional dendritic cell_Gngt2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_H2-M2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_Mgl2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_Tubb5 high(Lung)": "dendritic cell", + "Dendritic cell_Naaa high(Lung)": "dendritic cell", + "Dividing T cells(Lung)": "T cell", + "Dividing cells(Lung)": "unknown", + "Dividing dendritic cells(Lung)": "dendritic cell", + "Endothelial cell_Kdr high(Lung)": "endothelial cell", + "Endothelial cell_Tmem100 high(Lung)": "endothelial cell", + "Endothelial cells_Vwf high(Lung)": "endothelial cell", + "Eosinophil granulocyte(Lung)": "eosinophil", + "Ig−producing B cell(Lung)": "B cell", + "Interstitial macrophage(Lung)": "lung macrophage", + "Monocyte progenitor cell(Lung)": "monocyte progenitor", + "NK Cell(Lung)": "NK cell", + "Neutrophil granulocyte(Lung)": "neutrophil", + "Nuocyte(Lung)": "nuocyte", + "Plasmacytoid dendritic cell(Lung)": "plasmacytoid dendritic cell", + "Stromal cell_Acta2 high(Lung)": "stromal cell", + "Stromal cell_Dcn high(Lung)": "stromal cell", + "Stromal cell_Inmt high(Lung)": "stromal cell", + "T Cell_Cd8b1 high(Lung)": "CD8-positive, alpha-beta T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py new file mode 100644 index 000000000..3f82240d9 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py @@ -0,0 +1,64 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_lung_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "lung" + + self.class_maps = { + "0": { + "AT1 Cell(Lung)": "alveolar epithelial cell type I", + "AT2 Cell(Lung)": "alveolar epithelial cell type II", + "Alveolar bipotent progenitor(Lung)": "alveolar bipotent progenitor", + "Alveolar macrophage_Ear2 high(Lung)": "alveolar macrophage", + "Alveolar macrophage_Pclaf high(Lung)": "alveolar macrophage", + "B Cell(Lung)": "B cell", + "Basophil(Lung)": "basophil", + "Ciliated cell(Lung)": "ciliated cell", + "Clara Cell(Lung)": "clara cell", + "Conventional dendritic cell_Gngt2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_H2-M2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_Mgl2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_Tubb5 high(Lung)": "dendritic cell", + "Dendritic cell_Naaa high(Lung)": "dendritic cell", + "Dividing T cells(Lung)": "T cell", + "Dividing cells(Lung)": "unknown", + "Dividing dendritic cells(Lung)": "dendritic cell", + "Endothelial cell_Kdr high(Lung)": "endothelial cell", + "Endothelial cell_Tmem100 high(Lung)": "endothelial cell", + "Endothelial cells_Vwf high(Lung)": "endothelial cell", + "Eosinophil granulocyte(Lung)": "eosinophil", + "Ig−producing B cell(Lung)": "B cell", + "Interstitial macrophage(Lung)": "lung macrophage", + "Monocyte progenitor cell(Lung)": "monocyte progenitor", + "NK Cell(Lung)": "NK cell", + "Neutrophil granulocyte(Lung)": "neutrophil", + "Nuocyte(Lung)": "nuocyte", + "Plasmacytoid dendritic cell(Lung)": "plasmacytoid dendritic cell", + "Stromal cell_Acta2 high(Lung)": "stromal cell", + "Stromal cell_Dcn high(Lung)": "stromal cell", + "Stromal cell_Inmt high(Lung)": "stromal cell", + "T Cell_Cd8b1 high(Lung)": "CD8-positive, alpha-beta T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py new file mode 100644 index 000000000..19b8775f3 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py @@ -0,0 +1,64 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_lung_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "lung" + + self.class_maps = { + "0": { + "AT1 Cell(Lung)": "alveolar epithelial cell type I", + "AT2 Cell(Lung)": "alveolar epithelial cell type II", + "Alveolar bipotent progenitor(Lung)": "alveolar bipotent progenitor", + "Alveolar macrophage_Ear2 high(Lung)": "alveolar macrophage", + "Alveolar macrophage_Pclaf high(Lung)": "alveolar macrophage", + "B Cell(Lung)": "B cell", + "Basophil(Lung)": "basophil", + "Ciliated cell(Lung)": "ciliated cell", + "Clara Cell(Lung)": "clara cell", + "Conventional dendritic cell_Gngt2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_H2-M2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_Mgl2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_Tubb5 high(Lung)": "dendritic cell", + "Dendritic cell_Naaa high(Lung)": "dendritic cell", + "Dividing T cells(Lung)": "T cell", + "Dividing cells(Lung)": "unknown", + "Dividing dendritic cells(Lung)": "dendritic cell", + "Endothelial cell_Kdr high(Lung)": "endothelial cell", + "Endothelial cell_Tmem100 high(Lung)": "endothelial cell", + "Endothelial cells_Vwf high(Lung)": "endothelial cell", + "Eosinophil granulocyte(Lung)": "eosinophil", + "Ig−producing B cell(Lung)": "B cell", + "Interstitial macrophage(Lung)": "lung macrophage", + "Monocyte progenitor cell(Lung)": "monocyte progenitor", + "NK Cell(Lung)": "NK cell", + "Neutrophil granulocyte(Lung)": "neutrophil", + "Nuocyte(Lung)": "nuocyte", + "Plasmacytoid dendritic cell(Lung)": "plasmacytoid dendritic cell", + "Stromal cell_Acta2 high(Lung)": "stromal cell", + "Stromal cell_Dcn high(Lung)": "stromal cell", + "Stromal cell_Inmt high(Lung)": "stromal cell", + "T Cell_Cd8b1 high(Lung)": "CD8-positive, alpha-beta T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py new file mode 100644 index 000000000..32b6e3f18 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py @@ -0,0 +1,51 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_malegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "malegonad" + + self.class_maps = { + "0": { + "Elongating spermatid(Testis)": "elongating spermatid", + "Erythroblast_Hbb-bs high(Testis)": "erythroblast", + "Leydig cell(Testis)": "leydig cell", + "Macrophage_Lyz2 high(Testis)": "macrophage", + "Pre-Sertoli cell_Cst9 high(Testis)": "pre-sertoli cell", + "Pre-Sertoli cell_Ctsl high(Testis)": "pre-sertoli cell", + "Preleptotene spermatogonia(Testis)": "preleptotene spermatogonia", + "Sertoli cell(Testis)": "sertoli cell", + "Spermatids_1700016P04Rik high(Testis)": "spermatid", + "Spermatids_Cst13 high(Testis)": "spermatid", + "Spermatids_Hmgb4 high(Testis)": "spermatid", + "Spermatids_Tnp1 high(Testis)": "spermatid", + "Spermatocyte_1700001F09Rik high(Testis)": "spermatocyte", + "Spermatocyte_Cabs1 high(Testis)": "spermatocyte", + "Spermatocyte_Calm2 high(Testis)": "spermatocyte", + "Spermatocyte_Mesp1 high(Testis)": "spermatocyte", + "Spermatocyte_Slc2a3 high(Testis)": "spermatocyte", + "Spermatogonia_1700001P01Rik high(Testis)": "spermatogonia", + "Spermatogonia_Tbc1d23 high(Testis)": "spermatogonia" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py new file mode 100644 index 000000000..67da67428 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py @@ -0,0 +1,51 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_malegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "malegonad" + + self.class_maps = { + "0": { + "Elongating spermatid(Testis)": "elongating spermatid", + "Erythroblast_Hbb-bs high(Testis)": "erythroblast", + "Leydig cell(Testis)": "leydig cell", + "Macrophage_Lyz2 high(Testis)": "macrophage", + "Pre-Sertoli cell_Cst9 high(Testis)": "pre-sertoli cell", + "Pre-Sertoli cell_Ctsl high(Testis)": "pre-sertoli cell", + "Preleptotene spermatogonia(Testis)": "preleptotene spermatogonia", + "Sertoli cell(Testis)": "sertoli cell", + "Spermatids_1700016P04Rik high(Testis)": "spermatid", + "Spermatids_Cst13 high(Testis)": "spermatid", + "Spermatids_Hmgb4 high(Testis)": "spermatid", + "Spermatids_Tnp1 high(Testis)": "spermatid", + "Spermatocyte_1700001F09Rik high(Testis)": "spermatocyte", + "Spermatocyte_Cabs1 high(Testis)": "spermatocyte", + "Spermatocyte_Calm2 high(Testis)": "spermatocyte", + "Spermatocyte_Mesp1 high(Testis)": "spermatocyte", + "Spermatocyte_Slc2a3 high(Testis)": "spermatocyte", + "Spermatogonia_1700001P01Rik high(Testis)": "spermatogonia", + "Spermatogonia_Tbc1d23 high(Testis)": "spermatogonia" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py new file mode 100644 index 000000000..350514e31 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py @@ -0,0 +1,50 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_mammarygland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "mammarygland" + + self.class_maps = { + "0": { + "B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)": "B cell", + "B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)": "B cell", + "B cell_Jchain high(Mammary-Gland-Virgin)": "B cell", + "Dendritic cell_Cst3 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Siglech high(Mammary-Gland-Virgin)": "dendritic cell", + "Dividing cell(Mammary-Gland-Virgin)": "proliferative cell", + "Luminal cell_Krt19 high (Mammary-Gland-Virgin)": "luminal epithelial cell of mammary gland", + "Luminal progenitor(Mammary-Gland-Virgin)": "luminal progenitor cell", + "Macrophage_C1qc high(Mammary-Gland-Virgin)": "macrophage", + "Macrophage_Lyz1 high(Mammary-Gland-Virgin)": "macrophage", + "NK cell(Mammary-Gland-Virgin)": "NK cell", + "Stem and progenitor cell(Mammary-Gland-Virgin)": "stem and progenitor cell", + "Stromal cell_Col3a1 high(Mammary-Gland-Virgin)": "stromal cell", + "Stromal cell_Pi16 high(Mammary-Gland-Virgin)": "stromal cell", + "T cell_Cd8b1 high(Mammary-Gland-Virgin)": "T cell", + "T cell_Ly6c2 high(Mammary-Gland-Virgin)": "T cell", + "T-cells_Ctla4 high(Mammary-Gland-Virgin)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py new file mode 100644 index 000000000..a8e2bca14 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py @@ -0,0 +1,50 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_mammarygland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "mammarygland" + + self.class_maps = { + "0": { + "B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)": "B cell", + "B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)": "B cell", + "B cell_Jchain high(Mammary-Gland-Virgin)": "B cell", + "Dendritic cell_Cst3 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Siglech high(Mammary-Gland-Virgin)": "dendritic cell", + "Dividing cell(Mammary-Gland-Virgin)": "proliferative cell", + "Luminal cell_Krt19 high (Mammary-Gland-Virgin)": "luminal epithelial cell of mammary gland", + "Luminal progenitor(Mammary-Gland-Virgin)": "luminal progenitor cell", + "Macrophage_C1qc high(Mammary-Gland-Virgin)": "macrophage", + "Macrophage_Lyz1 high(Mammary-Gland-Virgin)": "macrophage", + "NK cell(Mammary-Gland-Virgin)": "NK cell", + "Stem and progenitor cell(Mammary-Gland-Virgin)": "stem and progenitor cell", + "Stromal cell_Col3a1 high(Mammary-Gland-Virgin)": "stromal cell", + "Stromal cell_Pi16 high(Mammary-Gland-Virgin)": "stromal cell", + "T cell_Cd8b1 high(Mammary-Gland-Virgin)": "T cell", + "T cell_Ly6c2 high(Mammary-Gland-Virgin)": "T cell", + "T-cells_Ctla4 high(Mammary-Gland-Virgin)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py new file mode 100644 index 000000000..5f3bab9bd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py @@ -0,0 +1,50 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "mammarygland" + + self.class_maps = { + "0": { + "B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)": "B cell", + "B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)": "B cell", + "B cell_Jchain high(Mammary-Gland-Virgin)": "B cell", + "Dendritic cell_Cst3 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Siglech high(Mammary-Gland-Virgin)": "dendritic cell", + "Dividing cell(Mammary-Gland-Virgin)": "proliferative cell", + "Luminal cell_Krt19 high (Mammary-Gland-Virgin)": "luminal epithelial cell of mammary gland", + "Luminal progenitor(Mammary-Gland-Virgin)": "luminal progenitor cell", + "Macrophage_C1qc high(Mammary-Gland-Virgin)": "macrophage", + "Macrophage_Lyz1 high(Mammary-Gland-Virgin)": "macrophage", + "NK cell(Mammary-Gland-Virgin)": "NK cell", + "Stem and progenitor cell(Mammary-Gland-Virgin)": "stem and progenitor cell", + "Stromal cell_Col3a1 high(Mammary-Gland-Virgin)": "stromal cell", + "Stromal cell_Pi16 high(Mammary-Gland-Virgin)": "stromal cell", + "T cell_Cd8b1 high(Mammary-Gland-Virgin)": "T cell", + "T cell_Ly6c2 high(Mammary-Gland-Virgin)": "T cell", + "T-cells_Ctla4 high(Mammary-Gland-Virgin)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py new file mode 100644 index 000000000..b6c0351df --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py @@ -0,0 +1,50 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "mammarygland" + + self.class_maps = { + "0": { + "B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)": "B cell", + "B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)": "B cell", + "B cell_Jchain high(Mammary-Gland-Virgin)": "B cell", + "Dendritic cell_Cst3 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Siglech high(Mammary-Gland-Virgin)": "dendritic cell", + "Dividing cell(Mammary-Gland-Virgin)": "proliferative cell", + "Luminal cell_Krt19 high (Mammary-Gland-Virgin)": "luminal epithelial cell of mammary gland", + "Luminal progenitor(Mammary-Gland-Virgin)": "luminal progenitor cell", + "Macrophage_C1qc high(Mammary-Gland-Virgin)": "macrophage", + "Macrophage_Lyz1 high(Mammary-Gland-Virgin)": "macrophage", + "NK cell(Mammary-Gland-Virgin)": "NK cell", + "Stem and progenitor cell(Mammary-Gland-Virgin)": "stem and progenitor cell", + "Stromal cell_Col3a1 high(Mammary-Gland-Virgin)": "stromal cell", + "Stromal cell_Pi16 high(Mammary-Gland-Virgin)": "stromal cell", + "T cell_Cd8b1 high(Mammary-Gland-Virgin)": "T cell", + "T cell_Ly6c2 high(Mammary-Gland-Virgin)": "T cell", + "T-cells_Ctla4 high(Mammary-Gland-Virgin)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin4_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py new file mode 100644 index 000000000..34860727d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py @@ -0,0 +1,49 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "muscle" + + self.class_maps = { + "0": { + "B cell_Jchain high(Muscle)": "B cell", + "B cell_Vpreb3 high(Muscle)": "B cell", + "Dendritic cell(Muscle)": "dendritic cell", + "Endothelial cell(Muscle)": "endothelial cell", + "Erythroblast_Car1 high(Muscle)": "erythroblast", + "Erythroblast_Car2 high(Muscle)": "erythroblast", + "Granulocyte monocyte progenitor cell(Muscle)": "monocyte progenitor", + "Macrophage_Ms4a6c high(Muscle)": "macrophage", + "Macrophage_Retnla high(Muscle)": "macrophage", + "Muscle cell_Tnnc1 high(Muscle)": "muscle cell", + "Muscle cell_Tnnc2 high(Muscle)": "muscle cell", + "Muscle progenitor cell(Muscle)": "skeletal muscle satellite cell", + "Neutrophil_Camp high(Muscle)": "neutrophil", + "Neutrophil_Prg2 high(Muscle)": "neutrophil", + "Neutrophil_Retnlg high(Muscle)": "neutrophil", + "Stromal cell(Muscle)": "stromal cell", + "T cell(Muscle)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Muscle_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py new file mode 100644 index 000000000..58acfa317 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py @@ -0,0 +1,54 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "pancreas" + + self.class_maps = { + "0": { + "Acinar cell(Pancreas)": "pancreatic acinar cell", + "Dendrtic cell(Pancreas)": "dendritic cell", + "Ductal cell(Pancreas)": "pancreatic ductal cell", + "Endocrine cell(Pancreas)": "endocrine cell", + "Dividing cell(Pancreas)": "endocrine cell", + "Endothelial cell_Fabp4 high(Pancreas)": "endothelial cell", + "Endothelial cell_Lrg1 high(Pancreas)": "endothelial cell", + "Endothelial cell_Tm4sf1 high(Pancreas)": "endothelial cell", + "Erythroblast_Hbb-bt high(Pancreas)": "erythroblast", + "Erythroblast_Igkc high(Pancreas)": "erythroblast", + "Granulocyte(Pancreas)": "granulocyte", + "Macrophage_Ly6c2 high(Pancreas)": "macrophage", + "Macrophage(Pancreas)": "macrophage", + "Glial cell(Pancreas)": "glial cell", + "Smooth muscle cell_Acta2 high(Pancreas)": "smooth muscle cell", + "Smooth muscle cell_Rgs5 high(Pancreas)": "smooth muscle cell", + "Stromal cell_Fn1 high(Pancreas)": "stromal cell", + "Stromal cell_Mfap4 high(Pancreas)": "stromal cell", + "Stromal cell_Smoc2 high(Pancreas)": "stromal cell", + "T cell(Pancreas)": "t cell", + "B cell(Pancreas)": "b cell", + "β-cell(Pancreas)": "pancreatic B cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Pancreas_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py new file mode 100644 index 000000000..12e4fcd7a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py @@ -0,0 +1,60 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_placenta_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "placenta" + + self.class_maps = { + "0": { + "B cell(Placenta)": "B cell", + "Basophil(Placenta)": "basophil", + "Decidual stromal cell(Placenta)": "decidual stromal cell", + "Dendritic cell(Placenta)": "dendritic cell", + "Endodermal cell_Afp high(Placenta)": "endodermal cell", + "Endothelial cell_Maged2 high(Placenta)": "endothelial cell", + "Erythroblast_Hbb-y high(Placenta)": "erythroblast", + "Granulocyte monocyte progenitors(Placenta)": "monocyte progenitor", + "Granulocyte_Neat1 high(Placenta)": "granulocyte", + "Granulocyte_S100a9 high(Placenta)": "granulocyte", + "HSPC_Lmo2 high(Placenta)": "HSPC", + "Invasive spongiotrophoblast(Placenta)": "invasive spongiotrophoblast", + "Labyrinthine trophoblast(Placenta)": "labyrinthine trophoblast", + "Macrophage_Apoe high(Placenta)": "macrophage", + "Macrophage_Spp1 high(Placenta)": "macrophage", + "Megakaryocyte progenitor cell(Placenta)": "megakaryocte", + "Monocyte(Placenta)": "monocyte", + "NK cell(Placenta)": "NK cell", + "NKT cell(Placenta)": "NKT cell", + "PE lineage cell_Gkn2 high(Placenta)": "PE lineage cell", + "PE lineage cell_S100g high(Placenta)": "PE lineage cell", + "Progenitor trophoblast_Gjb3 high(Placenta)": "trophoblast progenitor", + "Spiral artery trophoblast giant cells(Placenta)": "spiral artery trophoblast giant cells", + "Spongiotrophoblast_Hsd11b2 high(Placenta)": "spongiotrophoblast", + "Spongiotrophoblast_Phlda2 high(Placenta)": "spongiotrophoblast", + "Stromal cell(Placenta)": "stromal cell", + "Stromal cell_Acta2 high(Placenta)": "stromal cell", + "Trophoblast progenitor_Taf7l high(Placenta)": "trophoblast progenitor", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py new file mode 100644 index 000000000..e62aaa0af --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py @@ -0,0 +1,60 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_placenta_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "placenta" + + self.class_maps = { + "0": { + "B cell(Placenta)": "B cell", + "Basophil(Placenta)": "basophil", + "Decidual stromal cell(Placenta)": "decidual stromal cell", + "Dendritic cell(Placenta)": "dendritic cell", + "Endodermal cell_Afp high(Placenta)": "endodermal cell", + "Endothelial cell_Maged2 high(Placenta)": "endothelial cell", + "Erythroblast_Hbb-y high(Placenta)": "erythroblast", + "Granulocyte monocyte progenitors(Placenta)": "monocyte progenitor", + "Granulocyte_Neat1 high(Placenta)": "granulocyte", + "Granulocyte_S100a9 high(Placenta)": "granulocyte", + "HSPC_Lmo2 high(Placenta)": "HSPC", + "Invasive spongiotrophoblast(Placenta)": "invasive spongiotrophoblast", + "Labyrinthine trophoblast(Placenta)": "labyrinthine trophoblast", + "Macrophage_Apoe high(Placenta)": "macrophage", + "Macrophage_Spp1 high(Placenta)": "macrophage", + "Megakaryocyte progenitor cell(Placenta)": "megakaryocte", + "Monocyte(Placenta)": "monocyte", + "NK cell(Placenta)": "NK cell", + "NKT cell(Placenta)": "NKT cell", + "PE lineage cell_Gkn2 high(Placenta)": "PE lineage cell", + "PE lineage cell_S100g high(Placenta)": "PE lineage cell", + "Progenitor trophoblast_Gjb3 high(Placenta)": "trophoblast progenitor", + "Spiral artery trophoblast giant cells(Placenta)": "spiral artery trophoblast giant cells", + "Spongiotrophoblast_Hsd11b2 high(Placenta)": "spongiotrophoblast", + "Spongiotrophoblast_Phlda2 high(Placenta)": "spongiotrophoblast", + "Stromal cell(Placenta)": "stromal cell", + "Stromal cell_Acta2 high(Placenta)": "stromal cell", + "Trophoblast progenitor_Taf7l high(Placenta)": "trophoblast progenitor", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py new file mode 100644 index 000000000..52baaaa0b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py @@ -0,0 +1,38 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_prostate_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "prostate" + + self.class_maps = { + "0": { + "Dendritic cell(Prostate)": "dendritic cell", + "Epithelial cell(Prostate)": "epithelial cell", + "Glandular epithelium(Prostate)": "glandular epithelial cell", + "Prostate gland cell(Prostate)": "glandular cell", + "Stromal cell(Prostate)": "stromal cell", + "T cell(Prostate)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py new file mode 100644 index 000000000..ddcaa51ec --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py @@ -0,0 +1,38 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_prostate_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "prostate" + + self.class_maps = { + "0": { + "Dendritic cell(Prostate)": "dendritic cell", + "Epithelial cell(Prostate)": "epithelial cell", + "Glandular epithelium(Prostate)": "glandular epithelial cell", + "Prostate gland cell(Prostate)": "glandular cell", + "Stromal cell(Prostate)": "stromal cell", + "T cell(Prostate)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py new file mode 100644 index 000000000..7947f5881 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py @@ -0,0 +1,57 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_rib_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "rib" + + self.class_maps = { + "0": { + "B cell(Neonatal-Rib)": "B cell", + "Cartilage cell_Clu high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Col2a1 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Cxcl14 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Ppa1 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Prg4 high(Neonatal-Rib)": "cartilage cell", + "Dividing cell(Neonatal-Rib)": "proliferative cell", + "Endothelial cell(Neonatal-Rib)": "endothelial cell", + "Erythroblast_Hba-a1 high(Neonatal-Rib)": "erythroblast", + "Erythroblast_Ttr high(Neonatal-Rib)": "erythroblast", + "Granulocyte(Neonatal-Rib)": "granulocyte", + "Macrophage_C1qc high(Neonatal-Rib)": "macrophage", + "Macrophage_Ctss high(Neonatal-Rib)": "macrophage", + "Muscle cell(Neonatal-Rib)": "muscle cell", + "Muscle cell_Acta2 high(Neonatal-Rib)": "muscle cell", + "Muscle cell_Actc1 high(Neonatal-Rib)": "muscle cell", + "Neuron_Mpz high(Neonatal-Rib)": "neuron", + "Neuron_Stmn2 high(Neonatal-Rib)": "neuron", + "Neutrophil(Neonatal-Rib)": "neutrophil", + "Neutrophil_Elane high(Neonatal-Rib)": "neutrophil", + "Oligodendrocyte(Neonatal-Rib)": "oligodendrocyte", + "Osteoblast(Neonatal-Rib)": "osteoblast", + "Osteoclast(Neonatal-Rib)": "osteoclast", + "Stromal cell_Acta1 high(Neonatal-Rib)": "stromal cell", + "Stromal cell_Tnmd high(Neonatal-Rib)": "stromal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py new file mode 100644 index 000000000..9dbbab288 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py @@ -0,0 +1,57 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_rib_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "rib" + + self.class_maps = { + "0": { + "B cell(Neonatal-Rib)": "B cell", + "Cartilage cell_Clu high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Col2a1 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Cxcl14 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Ppa1 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Prg4 high(Neonatal-Rib)": "cartilage cell", + "Dividing cell(Neonatal-Rib)": "proliferative cell", + "Endothelial cell(Neonatal-Rib)": "endothelial cell", + "Erythroblast_Hba-a1 high(Neonatal-Rib)": "erythroblast", + "Erythroblast_Ttr high(Neonatal-Rib)": "erythroblast", + "Granulocyte(Neonatal-Rib)": "granulocyte", + "Macrophage_C1qc high(Neonatal-Rib)": "macrophage", + "Macrophage_Ctss high(Neonatal-Rib)": "macrophage", + "Muscle cell(Neonatal-Rib)": "muscle cell", + "Muscle cell_Acta2 high(Neonatal-Rib)": "muscle cell", + "Muscle cell_Actc1 high(Neonatal-Rib)": "muscle cell", + "Neuron_Mpz high(Neonatal-Rib)": "neuron", + "Neuron_Stmn2 high(Neonatal-Rib)": "neuron", + "Neutrophil(Neonatal-Rib)": "neutrophil", + "Neutrophil_Elane high(Neonatal-Rib)": "neutrophil", + "Oligodendrocyte(Neonatal-Rib)": "oligodendrocyte", + "Osteoblast(Neonatal-Rib)": "osteoblast", + "Osteoclast(Neonatal-Rib)": "osteoclast", + "Stromal cell_Acta1 high(Neonatal-Rib)": "stromal cell", + "Stromal cell_Tnmd high(Neonatal-Rib)": "stromal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py new file mode 100644 index 000000000..d1461dd33 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py @@ -0,0 +1,57 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_rib_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "rib" + + self.class_maps = { + "0": { + "B cell(Neonatal-Rib)": "B cell", + "Cartilage cell_Clu high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Col2a1 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Cxcl14 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Ppa1 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Prg4 high(Neonatal-Rib)": "cartilage cell", + "Dividing cell(Neonatal-Rib)": "proliferative cell", + "Endothelial cell(Neonatal-Rib)": "endothelial cell", + "Erythroblast_Hba-a1 high(Neonatal-Rib)": "erythroblast", + "Erythroblast_Ttr high(Neonatal-Rib)": "erythroblast", + "Granulocyte(Neonatal-Rib)": "granulocyte", + "Macrophage_C1qc high(Neonatal-Rib)": "macrophage", + "Macrophage_Ctss high(Neonatal-Rib)": "macrophage", + "Muscle cell(Neonatal-Rib)": "muscle cell", + "Muscle cell_Acta2 high(Neonatal-Rib)": "muscle cell", + "Muscle cell_Actc1 high(Neonatal-Rib)": "muscle cell", + "Neuron_Mpz high(Neonatal-Rib)": "neuron", + "Neuron_Stmn2 high(Neonatal-Rib)": "neuron", + "Neutrophil(Neonatal-Rib)": "neutrophil", + "Neutrophil_Elane high(Neonatal-Rib)": "neutrophil", + "Oligodendrocyte(Neonatal-Rib)": "oligodendrocyte", + "Osteoblast(Neonatal-Rib)": "osteoblast", + "Osteoclast(Neonatal-Rib)": "osteoclast", + "Stromal cell_Acta1 high(Neonatal-Rib)": "stromal cell", + "Stromal cell_Tnmd high(Neonatal-Rib)": "stromal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py new file mode 100644 index 000000000..dba49eeff --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py @@ -0,0 +1,43 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_spleen_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "spleen" + + self.class_maps = { + "0": { + "Erythroblast(Spleen)": "proerythroblast", + "Dendritic cell_S100a4 high(Spleen)": "dendritic cell", + "Dendritic cell_Siglech high(Spleen)": "dendritic cell", + "Granulocyte(Spleen)": "granulocyte", + "Macrophage(Spleen)": "macrophage", + "Monocyte(Spleen)": "monocyte", + "NK cell(Spleen)": "NK cell", + "Neutrophil(Spleen)": "neutrophil", + "Plasma cell(Spleen)": "plasma cell", + "T cell(Spleen)": "T cell", + "Marginal zone B cell(Spleen)": "B cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Spleen_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py new file mode 100644 index 000000000..184f6cb13 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py @@ -0,0 +1,49 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_stomach_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "stomach" + + self.class_maps = { + "0": { + "Antral mucous cell (Stomach)": "antral mucous cell", + "Dendritic cell(Stomach)": "dendritic cell", + "Dividing cell(Stomach)": "proliferative cell", + "Epithelial cell_Gkn3 high(Stomach)": "epithelial cell", + "Epithelial cell_Krt20 high(Stomach)": "epithelial cell", + "Epithelial cell_Pla2g1b high(Stomach)": "epithelial cell", + "G cell(Stomach)": "G cell", + "Gastric mucosal cell(Stomach)": "gastric mucosal cell", + "Macrophage(Stomach)": "macrophage", + "Muscle cell(Stomach)": "muscle cell", + "Parietal cell (Stomach)": "parietal cell", + "Pit cell_Gm26917 high(Stomach)": "pit cell", + "Pit cell_Ifrd1 high(Stomach)": "pit cell", + "Stomach cell_Gkn2 high(Stomach)": "stomach cell", + "Stomach cell_Mt2 high(Stomach)": "stomach cell", + "Stomach cell_Muc5ac high(Stomach)": "stomach cell", + "Tuft cell(Stomach)": "tuft cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas", "500more_dge", "Stomach_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py new file mode 100644 index 000000000..4f5f041a5 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py @@ -0,0 +1,40 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_thymus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "thymus" + + self.class_maps = { + "0": { + "abT cell(Thymus)": "abT cell", + "B cell(Thymus)": "B cell", + "DPT cell(Thymus)": "double positive T cell", + "gdT cell (Thymus)": "gdT cell", + "Pre T cell(Thymus)": "immature T cell", + "Proliferating thymocyte(Thymus)": "immature T cell", + "T cell_Id2 high(Thymus)": "abT cell", # TODO check, not sure about this gene + "T cell_Ms4a4b high(Thymus)": "abT cell" # TODO check, not sure about this gene + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Thymus1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py new file mode 100644 index 000000000..fd148575f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py @@ -0,0 +1,51 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_uterus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "uterus" + + self.class_maps = { + "0": { + "B cell(Uterus)": "B cell", + "Dendritic cell(Uterus)": "dendritic cell", + "Endothelial cell_Cldn5 high(Uterus)": "endothelial cell", + "Endothelial cell_Tm4sf1 high(Uterus)": "endothelial cell", + "Glandular epithelium_Ltf high(Uterus)": "glandular epithelial cell", + "Glandular epithelium_Sprr2f high(Uterus)": "glandular epithelial cell", + "Granulocyte(Uterus)": "granulocyte", + "Keratinocyte(Uterus)": "keratinocyte", + "Macrophage(Uterus)": "macrophage", + "Monocyte(Uterus)": "monocyte", + "Muscle cell_Mgp high(Uterus)": "muscle cell", + "Muscle cell_Pcp4 high(Uterus)": "muscle cell", + "Smooth muscle cell_Rgs5 high(Uterus)": "smooth muscle cell", + "NK cell(Uterus)": "NK cell", + "Stromal cell_Ccl11 high(Uterus)": "stromal cell", + "Stromal cell_Cxcl14 high(Uterus)": "stromal cell", + "Stromal cell_Gm23935 high(Uterus)": "stromal cell", + "Stromal cell_Has1 high(Uterus)": "stromal cell", + "Stromal cell_Hsd11b2 high(Uterus)": "stromal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py new file mode 100644 index 000000000..b9c6ae41c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py @@ -0,0 +1,51 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_uterus_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "uterus" + + self.class_maps = { + "0": { + "B cell(Uterus)": "B cell", + "Dendritic cell(Uterus)": "dendritic cell", + "Endothelial cell_Cldn5 high(Uterus)": "endothelial cell", + "Endothelial cell_Tm4sf1 high(Uterus)": "endothelial cell", + "Glandular epithelium_Ltf high(Uterus)": "glandular epithelial cell", + "Glandular epithelium_Sprr2f high(Uterus)": "glandular epithelial cell", + "Granulocyte(Uterus)": "granulocyte", + "Keratinocyte(Uterus)": "keratinocyte", + "Macrophage(Uterus)": "macrophage", + "Monocyte(Uterus)": "monocyte", + "Muscle cell_Mgp high(Uterus)": "muscle cell", + "Muscle cell_Pcp4 high(Uterus)": "muscle cell", + "Smooth muscle cell_Rgs5 high(Uterus)": "smooth muscle cell", + "NK cell(Uterus)": "NK cell", + "Stromal cell_Ccl11 high(Uterus)": "stromal cell", + "Stromal cell_Cxcl14 high(Uterus)": "stromal cell", + "Stromal cell_Gm23935 high(Uterus)": "stromal cell", + "Stromal cell_Has1 high(Uterus)": "stromal cell", + "Stromal cell_Hsd11b2 high(Uterus)": "stromal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py new file mode 100644 index 000000000..e40cb5c55 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py @@ -0,0 +1,89 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2019_10x_kinchen_001_10.1016/j.cell.2018.08.067" + + self.download = "https://data.humancellatlas.org/project-assets/project-matrices/f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom" + self.download_meta = "private" + + self.author = "Simmons" + self.doi = "10.1016/j.cell.2018.08.067" + self.normalization = "raw" + self.organ = "colon" # ToDo: "lamina propria of mucosa of colon" + self.organism = "human" + self.protocol = "10x" + self.year = 2019 + + self.var_symbol_col = "names" + self.var_ensembl_col = "Accession" + + self.obs_key_state_exact = "donor_organism.diseases.ontology_label" + self.obs_key_healthy = self.obs_key_state_exact + self.healthy_state_healthy = "normal" + self.obs_key_cellontology_original = "celltype" + + self.class_maps = { + "0": { + "Endothelial 1": "Endothelial", + "Endothelial 2": "Endothelial", + "Glial": "Glial cells", + "Myofibroblasts": "Myofibroblasts", + "Pericyte 1": "Pericytes", + "Pericyte 2": "Pericytes", + "Pericytes": "Pericytes", + "Plasma Cells": "Plasma Cells", + "Smooth Muscle": "Smooth Muscle", + "Stromal 1": "Stromal", + "Stromal 2a": "Stromal", + "Stromal 2b": "Stromal", + "Stromal 3": "Stromal", + "Stromal 4": "Stromal", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "colon", "f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), + os.path.join(self.path, "human", "colon", "uc_meta_data_stromal_with_donor.txt"), + os.path.join(self.path, "human", "colon", "hc_meta_data_stromal_with_donor.txt") + ] + adata = anndata.read_loom(fn[0]) + ctuc = pd.read_csv(fn[1], sep="\t") + cthealthy = pd.read_csv(fn[2], sep="\t") + adata = adata[adata.obs["emptydrops_is_cell"] == "t"].copy() + adata = adata[adata.X.sum(axis=1).flatten() >= 250].copy() + uc = adata[adata.obs["donor_organism.diseases.ontology_label"] == "ulcerative colitis (disease)"].copy() + bcuc = [i.split("-")[0] for i in ctuc["Barcode"]] + seluc = [] + for i in uc.obs["barcode"]: + seluc.append((uc.obs["barcode"].str.count(i).sum() == 1) and i in bcuc) + uc = uc[seluc].copy() + ctuc.index = [i.split("-")[0] for i in ctuc["Barcode"]] + uc.obs["celltype"] = [ctuc.loc[i]["Cluster"] for i in uc.obs["barcode"]] + uc.var = uc.var.reset_index().rename(columns={"index": "names"}).set_index("featurekey") + healthy = adata[adata.obs["donor_organism.diseases.ontology_label"] == "normal"].copy() + bchealthy = [i.split("-")[0] for i in cthealthy["Barcode"]] + selhealthy = [] + for i in healthy.obs["barcode"]: + selhealthy.append((healthy.obs["barcode"].str.count(i).sum() == 1) and i in bchealthy) + healthy = healthy[selhealthy].copy() + cthealthy.index = [i.split("-")[0] for i in cthealthy["Barcode"]] + healthy.obs["celltype"] = [cthealthy.loc[i]["Cluster"] for i in healthy.obs["barcode"]] + healthy.var = healthy.var.reset_index().rename(columns={"index": "names"}).set_index("featurekey") + self.adata = healthy.concatenate(uc) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py new file mode 100644 index 000000000..9c4f27a3e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py @@ -0,0 +1,88 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" + + self.download = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" + self.download_meta = None + + self.author = "Regev" + self.doi = "10.1016/j.cell.2019.06.029" + self.healthy = True + self.normalization = "raw" + self.organ = "colon" # ToDo: "colonic epithelium" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Cycling TA": "Cycling TA", + "TA 1": "TA 1", + "TA 2": "TA 2", + "Immature Enterocytes 2": "Immature Enterocytes 2", + "Immature Enterocytes 1": "Immature Enterocytes 1", + "Enterocyte Progenitors": "Enterocyte Progenitors", + "Immature Goblet": "Immature Goblet", + "Enterocytes": "Enterocytes", + "Secretory TA": "Secretory TA", + "Best4+ Enterocytes": "Best4+ Enterocytes", + "CD8+ IELs": "CD8+ IELs", + "Goblet": "Goblet cells", + "Stem": "Stem cells", + "Tuft": "Tuft", + "Follicular": "Follicular", + "Enteroendocrine": "Enteroendocrine cells", + "Plasma": "Plasma Cells", + "CD4+ Memory": "CD4+ Memory", + "CD8+ LP": "CD8+ LP", + "CD69- Mast": "CD69- Mast", + "Macrophages": "Macrophage", + "GC": "Glial cells", + "Cycling B": "B cell cycling", + "CD4+ Activated Fos-hi": "CD4+ T Activated Fos-hi", + "CD4+ Activated Fos-lo": "CD4+ T Activated Fos-lo", + "NKs": "NK", + "Cycling T": "Cycling T", + "M cells": "M cells", + "CD69+ Mast": "CD69+ Mast", + "MT-hi": "MT-hi", + "CD8+ IL17+": "CD8+ IL17+", + "CD4+ PD1+": "CD4+ PD1+", + "DC2": "DC2", + "Treg": "Treg", + "ILCs": "ILC", + "DC1": "DC1", + "WNT2B+ Fos-lo 1": "WNT2B+ Fos-lo 1", + "WNT5B+ 2": "WNT5B+ 2", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "colon", "smillie19_epi.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py new file mode 100644 index 000000000..bba8ea11c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py @@ -0,0 +1,70 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_ileum_2019_10x_martin_001_10.1016/j.cell.2019.08.008" + + self.download = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" + self.download_meta = None + + self.author = "Kenigsberg" + self.doi = "v" + self.healthy = True + self.normalization = "raw" + self.organ = "ileum" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + self.var_symbol_col = "index" + self.var_ensembl_col = "gene_ids" + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "T cells": "T cells", + "Plasma cells": "Plasma Cells", + "B cells": "B cells", + "MNP": "MNP", + "ILC": "ILC", + "Enterocytes": "Enterocytes", + "Fibs": "Fibroblasts", + "CD36+ endothelium": "CD36+ endothelium", + "Progenitors": "Progenitors", + "Goblets": "Goblet cells", + "Glial cells": "Glial cells", + "Cycling": "Cycling", + "ACKR1+ endothelium": "ACKR1+ endothelium", + "Pericytes": "Pericytes", + "Lymphatics": "Lymphatics", + "Mast cells": "Mast cells", + "SM": "Smooth muscle cell", + "TA": "TA", + "Paneth cells": "Paneth cells", + "Enteroendocrines": "Enteroendocrine cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "ileum", "martin19.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) + self.adata = self.adata[self.adata.obs["CellType"] != "Doublets"].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py new file mode 100644 index 000000000..c5222d562 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py @@ -0,0 +1,58 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_prostate_2018_10x_henry_001_10.1016/j.celrep.2018.11.086" + + self.download = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" + self.download_meta = None + + self.author = "Strand" + self.doi = "10.1016/j.celrep.2018.11.086" + self.healthy = True + self.normalization = "raw" + self.state_exact = "healthy" + self.organ = "prostate" + self.organism = "human" + self.protocol = "10x" + self.year = 2018 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Basal": "Basal cell", + "Hillock": "Hillock", + "Luminal": "Luminal", + "Endothelia": "Endothelial cell", + "Club": "Club", + "Fibroblast": "Fibroblast", + "Smooth muscle": "Smooth muscle cell", + "Leukocytes": "Leukocytes", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "prostate", "henry18_0.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py new file mode 100644 index 000000000..6b28e777a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py @@ -0,0 +1,64 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2016_indrop_baron_001_10.1016/j.cels.2016.08.011" + + self.download = "https://covid19.cog.sanger.ac.uk/baron16.processed.h5ad" + self.download_meta = None + + self.author = "Yanai" + self.doi = "10.1016/j.cels.2016.08.011" + self.healthy = True + self.normalization = "raw" + self.organ = "pancreas" + self.organism = "human" + self.protocol = "inDrop" + self.state_exact = "healthy" + self.year = 2016 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "t_cell": "T cell", + "quiescent_stellate": "Quiescent Stellate cell", + "mast": "Mast cell", + "delta": "Delta cell", + "beta": "Beta cell", + "endothelial": "Endothelial cell", + "macrophage": "Macrophage", + "epsilon": "Epsilon cell", + "activated_stellate": "Activated Stellate cell", + "acinar": "Acinar cell", + "alpha": "Alpha cell", + "ductal": "Ductal cell", + "schwann": "Schwann cell", + "gamma": "Gamma cell", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "pancreas", "baron16.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py new file mode 100644 index 000000000..d03786716 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -0,0 +1,72 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2016_smartseq2_segerstolpe_001_10.1016/j.cmet.2016.08.020" + + self.download = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.processed.1.zip" + self.download_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.sdrf.txt" + + self.author = "Sandberg" + self.doi = "10.1016/j.cmet.2016.08.020" + self.normalization = "raw" + self.organ = "pancreas" + self.organism = "human" + self.protocol = "Smartseq2" + self.year = 2016 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "Characteristics[cell type]" + self.obs_key_state_exact = "Characteristics[disease]" + self.obs_key_healthy = self.obs_key_state_exact + + self.healthy_state_healthy = "normal" + + self.class_maps = { + "0": { + "alpha cell": "Alpha cell", + "ductal cell": "Ductal cell", + "beta cell": "Beta cell", + "gamma cell": "Gamma cell", + "acinar cell": "Acinar cell", + "delta cell": "Delta cell", + "PSC cell": "PSC cell", + "unclassified endocrine cell": "Unclassified endocrine cell", + "co-expression cell": "Co-expression cell", + "endothelial cell": "Endothelial cell", + "epsilon cell": "Epsilon cell", + "mast cell": "Mast cell", + "MHC class II cell": "MHC class II cell", + "unclassified cell": "Unknown", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.processed.1.zip"), + os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.sdrf.txt") + ] + df = pd.read_csv(fn[0], sep="\t") + df.index = df.index.get_level_values(0) + df = df.drop("#samples", axis=1) + df = df.T.iloc[:, :26178] + self.adata = anndata.AnnData(df) + self.adata.obs = pd.read_csv(fn[1], sep="\t").set_index("Source Name").loc[self.adata.obs.index] + # filter observations which are not cells (empty wells, low quality cells etc.) + self.adata = self.adata[self.adata.obs["Characteristics[cell type]"] != "not applicable"].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py new file mode 100644 index 000000000..2c89b9310 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py @@ -0,0 +1,59 @@ +import anndata +import numpy as np +import os +import pandas +from typing import Union +from sfaira.data import DatasetBase + + +class Dataset_d10_1016_j_cmet_2019_01_021(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.download = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" + + self.author = "Bhushan" + self.doi = "10.1016/j.cmet.2019.01.021" + self.healthy = False + self.normalization = "raw" + self.organ = "pancreas" + self.organism = "mouse" + self.protocol = "10x" + self.state_exact = "diabetic" + self.year = 2019 + + self.var_symbol_col = "index" + + self.class_maps = { + "0": { + "acinar": "pancreatic acinar cell", + "ductal": "pancreatic ductal cell", + "leukocyte": "leukocyte", + "T cell(Pancreas)": "t cell", + "B cell(Pancreas)": "b cell", + "beta": "pancreatic B cell", + "alpha": "pancreatic A cell", + "delta": "pancreatic D cell", + "pp": "pancreatic PP cell", + "smooth_muscle": "smooth muscle cell", + "stellate cell": "pancreatic stellate cell", + "fibroblast": "stromal cell", + "endothelial": "endothelial cell" + }, + } + + def _load_generalized(self, fn, fn_meta): + celltypes = pandas.read_csv(fn_meta, index_col=0) + + self.adata = anndata.read_mtx(fn + "_matrix.mtx.gz").transpose() + self.adata.var_names = np.genfromtxt(fn + "_genes.tsv.gz", dtype=str)[:, 1] + self.adata.obs_names = np.genfromtxt(fn + "_barcodes.tsv.gz", dtype=str) + self.adata.var_names_make_unique() + self.adata = self.adata[celltypes.index] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_001.py new file mode 100644 index 000000000..5128278fd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_001.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_001_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_002.py new file mode 100644 index 000000000..142e0f759 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_002.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_002_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_003.py new file mode 100644 index 000000000..e8ba6f466 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_003.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_003_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_004.py new file mode 100644 index 000000000..50d2cf114 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_004.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_004_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_005.py new file mode 100644 index 000000000..06b60eaaa --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_005.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_005_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_006.py new file mode 100644 index 000000000..e9c96a3e5 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_006.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_006_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_007.py new file mode 100644 index 000000000..efc3678ed --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_007.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_007_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_008.py new file mode 100644 index 000000000..b64c76432 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_008.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_008_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py new file mode 100644 index 000000000..184392a49 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py @@ -0,0 +1,77 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_10x_miller_001_10.1016/j.devcel.2020.01.033" + + self.download = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" + self.download_meta = None + + self.author = "Spence" + self.doi = "10.1016/j.devcel.2020.01.033" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "Cell_type" + + self.class_maps = { + "0": { + "Airway Smooth Muscle": "Airway smooth muscle", + "Basal cell": "Basal", + "Bud tip adjacent": "Fetal airway progenitors", + "Bud tip progenitor": "Fetal airway progenitors", + "Cartilage": "Cartilage", + "Club-like secretory": "Secretory", + "Endothelial": "1_Endothelial", + "Epithelial": "1_Epithelial", + "Goblet-like secretory": "Secretory", + "Hematopoietic, B Cells": "B cell lineage", + "Hematopoietic, Macrophage": "Macrophages", + "Hematopoietic, Natural Killer Cell": "Innate lymphoid cells", + "Hematopoietic, T Cells": "T cell lineage", + "Immune": "1_Immune", + "Intermediate ciliated": "Multiciliated lineage", + "Mesenchyme RSPO2+": "1_Stroma", + "Mesenchyme SERPINF1-high": "1_Stroma", + "Multiciliated cell": "Multiciliated lineage", + "Multiciliated precursor": "Multiciliated lineage", + "Neuroendocrine": "Rare", + "Pericyte": "Fibroblasts", + "RBC": "Erythrocytes", + "Secretory progenitor": "Secretory", + "Submucosal gland": "Submucosal Secretory", + "Submucosal gland basal": "Submucosal Secretory", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "miller20.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nUMI"].values[:, None]))\ + .multiply(1 / 10000) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py new file mode 100644 index 000000000..15283deff --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py @@ -0,0 +1,64 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2017_DroNcSeq_habib_001_10.1038/nmeth.4407" + self.download = "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad" + self.download_meta = None + + self.author = "Regev" + self.doi = "10.1038/nmeth.4407" + self.healthy = True + self.normalization = "raw" + self.organ = "brain" + self.organism = "human" + self.protocol = "DroNcSeq" + self.state_exact = "healthy" + self.year = 2017 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "exPFC1": "Glutamatergic neurons from the PFC 1", + "exPFC2": "Glutamatergic neurons from the PFC 2", + "exDG": "Granule neurons from the hip dentate gyrus region", + "GABA1": "GABAergic interneurons 1", + "GABA2": "GABAergic interneurons 2", + "exCA3": "Pyramidal neurons from the hip CA region 1", + "exCA1": "Pyramidal neurons from the hip CA region 2", + "ODC1": "Oligodendrocytes", + "ASC1": "Astrocytes 1", + "OPC": "Oligodendrocyte precursors", + "ASC2": "Astrocytes 2", + "Unclassified": "Unknown", + "MG": "Microglia", + "NSC": "Neuronal stem cells", + "END": "Endothelial cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "brain", "habib17.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py new file mode 100644 index 000000000..1a22b898a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py @@ -0,0 +1,61 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_malegonad_2018_10x_guo_001_10.1038/s41422-018-0099-2" + + self.download = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" + self.download_meta = None + + self.author = "Cairns" + self.doi = "10.1038/s41422-018-0099-2" + self.healthy = True + self.normalization = "raw" + self.organ = "malegonad" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Elongated Spermatids": "Elongated Spermatids", + "Leydig cells": "Leydig cells", + "Early Primary Spermatocytes": "Early Primary Spermatocytes", + "Round Spermatids": "Round Spermatids", + "Endothelial cells": "Endothelial cells", + "Macrophages": "Macrophages", + "Myoid cells": "Myoid cells", + "Differentiating Spermatogonia": "Differentiating Spermatogonia", + "Late primary Spermatocytes": "Late primary Spermatocytes", + "Spermatogonial Stem cell": "Spermatogonial Stem cell", + "Sertoli cells": "Sertoli cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "malegonad", "guo18_donor.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py new file mode 100644 index 000000000..67945d2ce --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py @@ -0,0 +1,71 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2018_10x_macparland_001_10.1038/s41467-018-06318-7" + + self.download = "private" + self.download_meta = "private" + + self.author = "McGilvray" + self.doi = "10.1038/s41467-018-06318-7" + self.healthy = True + self.normalization = "raw" + self.organ = "liver" # ToDo: "caudate lobe" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "celltype" + + self.class_maps = { + "0": { + "1": "Hepatocyte 1", + "2": "Alpha beta T cells", + "3": "Hepatocyte 2", + "4": "Inflammatory macrophages", + "5": "Hepatocyte 3", + "6": "Hepatocyte 4", + "7": "Plasma cells", + "8": "NK cell", + "9": "Gamma delta T cells 1", + "10": "Non inflammatory macrophages", + "11": "Periportal LSECs", + "12": "Central venous LSECs", + "13": "Endothelial cell", + "14": "Hepatocyte 5", + "15": "Hepatocyte 6", + "16": "Mature B cells", + "17": "Cholangiocytes", + "18": "Gamma delta T cells 2", + "19": "Erythroid cells", + "20": "Hepatic stellate cells" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "liver", "GSE115469.csv.gz"), + os.path.join(self.path, "human", "liver", "GSE115469_labels.txt") + ] + self.adata = anndata.read_csv(fn[0]).T + celltype_df = pd.read_csv(fn[1], sep="\t").set_index("CellName") + self.adata.obs["celltype"] = [str(celltype_df.loc[i]["Cluster#"]) for i in self.adata.obs.index] diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py new file mode 100644 index 000000000..2e27a91c5 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py @@ -0,0 +1,80 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2019_10xSn_lake_001_10.1038/s41467-019-10861-2" + + self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/" \ + "GSE121nnn/GSE121862/suppl/GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotated%5FRaw%5FUMI%5FMatrix%2Etsv%2Egz" + self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/" \ + "GSE121nnn/GSE121862/suppl/GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotations%2Ecsv%2Egz" + + self.author = "Jain" + self.doi = "10.1038/s41467-019-10861-2" + self.healthy = True + self.normalization = "raw" + self.organ = "kidney" + self.organism = "human" + self.protocol = "10xSn" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "celltype" + + self.class_maps = { + "0": { + "Collecting Duct - Intercalated Cells Type A (cortex)": "Collecting Duct - Intercalated Cells Type A (cortex)", + "Collecting Duct - Intercalated Cells Type A (medulla)": "Collecting Duct - Intercalated Cells Type A (medulla)", + "Collecting Duct - Intercalated Cells Type B": "Collecting Duct - Intercalated Cells Type B", + "Collecting Duct - PCs - Stressed Dissoc Subset": "Collecting Duct - PCs - Stressed Dissoc Subset", + "Collecting Duct - Principal Cells (cortex)": "Collecting Duct - Principal Cells (cortex)", + "Collecting Duct - Principal Cells (medulla)": "Collecting Duct - Principal Cells (medulla)", + "Connecting Tubule": "Connecting tubule", + "Decending Limb": "Decending Limb", + "Distal Convoluted Tubule": "Distal Convoluted Tubule", + "Endothelial Cells (unassigned)": "Endothelial Cells (unassigned)", + "Endothelial Cells - AEA & DVR ": "Endothelial Cells - AEA & DVR", + "Endothelial Cells - AVR": "Endothelial Cells - AVR", + "Endothelial Cells - glomerular capillaries": "Endothelial Cells - glomerular capillaries", + "Epithelial Cells (unassigned)": "Epithelial Cells (unassigned)", + "Immune Cells - Macrophages": "Macrophage", + "Interstitium": "Interstitium", + "Mesangial Cells": "Mesangial Cells", + "Podocytes": "Podocyte", + "Proximal Tubule Epithelial Cells (S1)": "Proximal Tubule Epithelial Cells (S1)", + "Proximal Tubule Epithelial Cells (S2)": "Proximal Tubule Epithelial Cells (S2)", + "Proximal Tubule Epithelial Cells (S3)": "Proximal Tubule Epithelial Cells (S3)", + "Proximal Tubule Epithelial Cells - Fibrinogen+ (S3 )": "Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)", + "Proximal Tubule Epithelial Cells - Stress/Inflam": "Proximal Tubule Epithelial Cells - Stress/Inflam", + "Thick Ascending Limb": "Thick ascending limb of Loop of Henle", + "Thin ascending limb": "Thin ascending limb", + "Unknown - Novel PT CFH+ Subpopulation (S2)": "Unknown - Novel PT CFH+ Subpopulation (S2)", + "Vascular Smooth Muscle Cells and pericytes": "Vascular Smooth Muscle Cells and pericytes", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), + os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") + ] + self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) + annot = pd.read_csv(fn[1], index_col=0, dtype="category") + self.adata.obs["celltype"] = [annot.loc[i.split("_")[0][1:]]["Annotation"] for i in self.adata.obs.index] diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py new file mode 100644 index 000000000..902beb985 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py @@ -0,0 +1,117 @@ +import anndata +import os +from typing import Union +import tarfile +import pandas as pd +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_mixed_2019_10x_szabo_001_10.1038/s41467-019-12464-3" + + self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" + self.download_meta = "private" + + self.author = "Sims" + self.doi = "10.1038/s41467-019-12464-3" + self.healthy = True + self.normalization = "raw" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "Gene" + self.var_ensembl_col = "Accession" + + self.obs_key_cellontology_original = "cell_ontology_class" + self.obs_key_organ = "organ" + + self.loaded = False # TODO do this differently? + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "mixed", "GSE126030_RAW.tar"), + os.path.join(self.path, "human", "mixed", "donor1.annotation.txt"), + os.path.join(self.path, "human", "mixed", "donor2.annotation.txt"), + ] + adatas = [] + with tarfile.open(fn[0]) as tar: + for member in tar.getmembers(): + df = pd.read_csv(tar.extractfile(member.name), compression="gzip", sep="\t") + df.index = [i.split(".")[0] for i in df["Accession"]] + var = pd.concat([df.pop(x) for x in ["Gene", "Accession"]], 1) + if df.columns[-1].startswith("Un"): + df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) + self.adata = anndata.AnnData(df.T) + self.adata.var = var + if "PP001" in member.name or "PP002" in member.name: + self.adata.obs["donor"] = "Donor1" + self.adata.obs["organ"] = "Lung" + elif "PP003" in member.name or "PP004" in member.name: + self.adata.obs["donor"] = "Donor1" + self.adata.obs["organ"] = "Bone Marrow" + elif "PP005" in member.name or "PP006" in member.name: + self.adata.obs["donor"] = "Donor1" + self.adata.obs["organ"] = "Lymph Node" + elif "PP009" in member.name or "PP010" in member.name: + self.adata.obs["donor"] = "Donor2" + self.adata.obs["organ"] = "Lung" + elif "PP011" in member.name or "PP012" in member.name: + self.adata.obs["donor"] = "Donor2" + self.adata.obs["organ"] = "Bone Marrow" + elif "PP013" in member.name or "PP014" in member.name: + self.adata.obs["donor"] = "Donor2" + self.adata.obs["organ"] = "Lymph Node" + else: + continue + self.adata.obs.index = member.name.split("_")[1].split("s")[0] + "nskept." + self.adata.obs.index + adatas.append(self.adata) + self.adata = adatas[0].concatenate(adatas[1:], index_unique=None) + self.adata.obs.drop("batch", axis=1, inplace=True) + self.adata = self.adata[:, self.adata.X.sum(axis=0) > 0].copy() + self.adata.obs["cell_ontology_class"] = "Unknown" + df1 = pd.read_csv(fn[1], sep="\t", index_col=0, header=None) + df2 = pd.read_csv(fn[2], sep="\t", index_col=0, header=None) + for i in df1.index: + self.adata.obs["cell_ontology_class"].loc[i] = df1.loc[i][1] + for i in df2.index: + self.adata.obs["cell_ontology_class"].loc[i] = df2.loc[i][1] + self.adata.X = scipy.sparse.csc_matrix(self.adata.X) + + # TODO we should move this code into the base class + # If the subset_organs() method has been run before, subset to specified organs + if "organsubset" in self.__dict__: + self.adata = self.adata[self.adata.obs["organ"].isin(self.organsubset)] + # If adata object is empty, set it to None + if not len(self.adata): + self.adata = None + self.loaded = True + + @property + def ncells(self): + if "organsubset" in self.__dict__: + if not self.loaded: + self._load() + if self.adata is None: + return 0 + else: + return self.adata.n_obs + else: + return super().ncells diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py new file mode 100644 index 000000000..8ce5afabe --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py @@ -0,0 +1,54 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_eye_2019_10x_menon_001_10.1038/s41467-019-12780-8" + + self.download = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" + self.download_meta = None + + self.author = "Hafler" + self.doi = "10.1038/s41467-019-12780-8" + self.healthy = True + self.normalization = "raw" + self.organ = "eye" # ToDo: "retina" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "ACs": "Amacrine cell", + "BPs": "BPs", + "Cones": "Retinal cone cell", + "Endo": "Endothelial cell", + "HCs": "Horizontal cells", + "Macroglia": "Macroglia", + "Microglia": "Microglia", + "RGCs": "Retinal ganglion cell", + "Rods": "Rods", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "eye", "menon19.processed.h5ad") + self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py new file mode 100644 index 000000000..cb2e14589 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py @@ -0,0 +1,92 @@ +import os +from typing import Union +import pandas as pd +import anndata + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_placenta_2018_10x_ventotormo_10.1038/s41586-018-0698-6" + + self.download = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/E-MTAB-6701.processed.1.zip" + self.download_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/E-MTAB-6701.processed.2.zip" + + self.author = "Teichmann" + self.healthy = True + self.normalization = "raw" + self.organ = "placenta,decidua,blood" # ToDo: move this into .obs_key_organ? + self.organism = "human" + self.doi = "10.1038/s41586-018-0698-6" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "names" + self.var_ensembl_col = "ensembl" + + self.obs_key_cellontology_original = "annotation" + # ToDo: further anatomical information for subtissue in "location" + + self.class_maps = { + "0": { + "DC1": "Dendritic Cells 1", + "DC2": "Dendritic Cells 2", + "EVT": "Extravillous Trophoblasts", + "Endo (f)": "Endothelial Cells f", + "Endo (m)": "Endothelial Cells m", + "Endo L": "Endothelial Cells L", + "Epi1": "Epithelial Glandular Cells 1", + "Epi2": "Epithelial Glandular Cells 2", + "Granulocytes": "Granulocytes", + "HB": "Hofbauer Cells", + "ILC3": "ILC3", + "MO": "Monocyte", + "NK CD16+": "NK Cells CD16+", + "NK CD16-": "NK Cells CD16-", + "Plasma": "B cell (Plasmocyte)", + "SCT": "Syncytiotrophoblasts", + "Tcells": "T cell", + "VCT": "Villous Cytotrophoblasts", + "dM1": "Decidual Macrophages 1", + "dM2": "Decidual Macrophages 2", + "dM3": "Decidual Macrophages 3", + "dNK p": "Decidual NK Cells p", + "dNK1": "Decidual NK Cells 1", + "dNK2": "Decidual NK Cells 2", + "dNK3": "Decidual NK Cells 3", + "dP1": "Perivascular Cells 1", + "dP2": "Perivascular Cells 2", + "dS1": "Decidual Stromal Cells 1", + "dS2": "Decidual Stromal Cells 2", + "dS3": "Decidual Stromal Cells 3", + "fFB1": "Fibroblasts 1", + "fFB2": "Fibroblasts 2", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.1.zip"), + os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.2.zip"), + ] + self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t", index_col="Gene").T) + df = pd.read_csv(fn[1], sep="\t") + for i in df.columns: + self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] + + self.adata.var["ensembl"] = [i.split("_")[1] for i in self.adata.var.index] + self.adata.var["names"] = [i.split("_")[0] for i in self.adata.var.index] + self.adata.var = self.adata.var.reset_index().reset_index().drop("index", axis=1) + self.adata = self.adata[:, ~self.adata.var.index.isin( + ["", "-1", "-10", "-11", "-2", "-3", "-4", "-5", "-6", "-7", "-8", "-9", "A.2", "A.3"])].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_smartseq2_ventotormo_001.py new file mode 100644 index 000000000..666d41719 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_smartseq2_ventotormo_001.py @@ -0,0 +1,92 @@ +import os +from typing import Union +import pandas as pd +import anndata + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_placenta_2018_smartseq2_ventotormo_10.1038/s41586-018-0698-6" + + self.download = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6678/E-MTAB-6678.processed.1.zip" + self.download_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6678/E-MTAB-6678.processed.2.zip" + + self.author = "Teichmann" + self.healthy = True + self.normalization = "raw" + self.organ = "placenta,decidua,blood" # ToDo: move this into .obs_key_organ? + self.organism = "human" + self.doi = "10.1038/s41586-018-0698-6" + self.protocol = "Smartseq2" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "names" + self.var_ensembl_col = "ensembl" + + self.obs_key_cellontology_original = "annotation" + # ToDo: further anatomical information for subtissue in "location" + + self.class_maps = { + "0": { + "DC1": "Dendritic Cells 1", + "DC2": "Dendritic Cells 2", + "EVT": "Extravillous Trophoblasts", + "Endo (f)": "Endothelial Cells f", + "Endo (m)": "Endothelial Cells m", + "Endo L": "Endothelial Cells L", + "Epi1": "Epithelial Glandular Cells 1", + "Epi2": "Epithelial Glandular Cells 2", + "Granulocytes": "Granulocytes", + "HB": "Hofbauer Cells", + "ILC3": "ILC3", + "MO": "Monocyte", + "NK CD16+": "NK Cells CD16+", + "NK CD16-": "NK Cells CD16-", + "Plasma": "B cell (Plasmocyte)", + "SCT": "Syncytiotrophoblasts", + "Tcells": "T cell", + "VCT": "Villous Cytotrophoblasts", + "dM1": "Decidual Macrophages 1", + "dM2": "Decidual Macrophages 2", + "dM3": "Decidual Macrophages 3", + "dNK p": "Decidual NK Cells p", + "dNK1": "Decidual NK Cells 1", + "dNK2": "Decidual NK Cells 2", + "dNK3": "Decidual NK Cells 3", + "dP1": "Perivascular Cells 1", + "dP2": "Perivascular Cells 2", + "dS1": "Decidual Stromal Cells 1", + "dS2": "Decidual Stromal Cells 2", + "dS3": "Decidual Stromal Cells 3", + "fFB1": "Fibroblasts 1", + "fFB2": "Fibroblasts 2", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.1.zip"), + os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.2.zip"), + ] + self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t", index_col="Gene").T) + df = pd.read_csv(fn[1], sep="\t") + for i in df.columns: + self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] + + self.adata.var["ensembl"] = [i.split("_")[1] for i in self.adata.var.index] + self.adata.var["names"] = [i.split("_")[0] for i in self.adata.var.index] + self.adata.var = self.adata.var.reset_index().reset_index().drop("index", axis=1) + self.adata = self.adata[:, ~self.adata.var.index.isin( + ["", "-1", "-10", "-11", "-2", "-3", "-4", "-5", "-6", "-7", "-8", "-9", "A.2", "A.3"])].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_mCELSeq2_aizarani_001.py new file mode 100644 index 000000000..9df9d2693 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_mCELSeq2_aizarani_001.py @@ -0,0 +1,91 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2019_mCELSeq2_aizarani_001_10.1038/s41586-019-1373-2" + + self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5FNormalhumanlivercellatlasdata%2Etxt%2Egz" + self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5Fclusterpartition%2Etxt%2Egz" + + self.author = "Gruen" + self.doi = "10.1038/s41586-019-1373-2" + self.healthy = True + self.normalization = "raw" + self.organ = "liver" + self.organism = "human" + self.protocol = "mCEL-Seq2" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "1": "NK, NKT and T cells", + "2": "Kupffer Cell", + "3": "NK, NKT and T cells", + "4": "Cholangiocytes", + "5": "NK, NKT and T cells", + "6": "Kupffer Cell", + "7": "Cholangiocytes", + "8": "B Cell", + "9": "Liver sinusoidal endothelial cells", + "10": "Macrovascular endothelial cells", + "11": "Hepatocyte", + "12": "NK, NKT and T cells", + "13": "Liver sinusoidal endothelial cells", + "14": "Hepatocyte", + "15": "Other endothelial cells", + "16": "Unknown", + "17": "Hepatocyte", + "18": "NK, NKT and T cells", + "19": "Unknown", + "20": "Liver sinusoidal endothelial cells", + "21": "Macrovascular endothelial cells", + "22": "B Cell", + "23": "Kupffer Cell", + "24": "Cholangiocytes", + "25": "Kupffer Cell", + "26": "Other endothelial cells", + "27": "Unknown", + "28": "NK, NKT and T cells", + "29": "Macrovascular endothelial cells", + "30": "Hepatocyte", + "31": "Kupffer Cell", + "32": "Liver sinusoidal endothelial cells", + "33": "Hepatic stellate cells", + "34": "B Cell", + "35": "Other endothelial cells", + "36": "Unknown", + "37": "Unknown", + "38": "B Cell", + "39": "Cholangiocytes" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "liver", "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), + os.path.join(self.path, "human", "liver", "GSE124395_clusterpartition.txt.gz") + ] + self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) + celltype_df = pd.read_csv(fn[1], sep=" ") + self.adata = self.adata[[i in celltype_df.index for i in self.adata.obs.index]].copy() + self.adata.obs["CellType"] = [str(celltype_df.loc[i]["sct@cpart"]) for i in self.adata.obs.index] diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py new file mode 100644 index 000000000..cf932bc48 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py @@ -0,0 +1,83 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + """ + This dataloader requires manual preprocessing of the Rdata file that can be obtained from the link in the + `download_website` attribute of this class. The preprocessing code below uses the rpy2 and anndata2ri python + packages to convert the R object to anndata (pip install anndata2ri), run it in a jupyter notebook: + + ## Notebook Cell 1 + import anndata2ri + anndata2ri.activate() + %load_ext rpy2.ipython + + ## Notebook Cell 2 + %%R -o sce + library(Seurat) + load("tissue.rdata") + new_obj = CreateSeuratObject(counts = tissue@raw.data) + new_obj@meta.data = tissue@meta.data + sce <- as.SingleCellExperiment(new_obj) + + ## Notebook cell 3 + sce.write("ramachandran.h5ad") + + :param path: + :param meta_path: + :param kwargs: + """ + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2019_10x_ramachandran_001_10.1038/s41586-019-1631-3" + + self.download = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" + self.download_meta = None + + self.author = "Henderson" + self.doi = "10.1038/s41586-019-1631-3" + self.normalization = "raw" + self.organ = "liver" + self.organism = "human" + self.protocol = "10x" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "annotation_lineage" + self.obs_key_state_exact = "condition" + self.obs_key_healthy = self.obs_key_state_exact + self.healthy_state_healthy = "Uninjured" + + self.class_maps = { + "0": { + "MPs": "MP", + "Tcells": "Tcells", + "ILCs": "ILC", + "Endothelia": "Endothelia", + "Bcells": "Bcells", + "pDCs": "pDCs", + "Plasma Bcells": "Plasma B cell", + "Mast cells": "Mast cell", + "Mesenchyme": "Mesenchyme", + "Cholangiocytes": "Cholangiocytes", + "Hepatocytes": "Hepatocytes", + "Mesothelia": "Mesothelia", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "liver", "ramachandran.h5ad") + self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py new file mode 100644 index 000000000..e70da4873 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py @@ -0,0 +1,72 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2019_10x_popescu_001_10.1038/s41586-019-1652-y" + + self.download = "private" + self.download_meta = "private" + + self.author = "Haniffa" + self.doi = "10.1038/s41586-019-1652-y" + self.healthy = True + self.normalization = "raw" + self.organ = "liver" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "cell.labels" + + self.class_maps = { + "0": { + "B cell": "Mature B cells", + "DC1": "Dendritic cell 1", + "DC2": "Dendritic cell 2", + "DC precursor": "Dendritic cell precursor", + "Early Erythroid": "Early Erythroid", + "Early lymphoid_T lymphocyte": "Early lymphoid T lymphocyte", + "Endothelial cell": "Endothelial cell", + "Fibroblast": "Fibroblast", + "HSC_MPP": "HSC MPP", + "Hepatocyte": "Hepatocyte", + "ILC precursor": "ILC precursor", + "Kupffer Cell": "Kupffer Cell", + "Late Erythroid": "Late Erythroid", + "MEMP": "MEMP", + "Mast cell": "Mast cell", + "Megakaryocyte": "Megakaryocyte", + "Mid Erythroid": "Mid Erythroid", + "Mono-Mac": "Mono Macrophage", + "Monocyte": "Monocyte", + "Monocyte precursor": "Monocyte precursor", + "NK": "NK cell", + "Neutrophil-myeloid progenitor": "Neutrophil myeloid progenitor", + "Pre pro B cell": "Pre pro B cell", + "VCAM1+ EI macrophage": "VCAM1pos EI macrophage", + "pDC precursor": "pDendritic cell precursor", + "pre-B cell": "pre B cell", + "pro-B cell": "pro B cell" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "liver", "fetal_liver_alladata_.h5ad") + self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py new file mode 100644 index 000000000..75b926aa6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py @@ -0,0 +1,156 @@ +import anndata +import numpy as np +import os +import pandas as pd +import scipy.sparse +from typing import Union +import urllib.request +import zipfile + +from sfaira.data import DatasetBase + + +class Dataset_d10_1038_s41586_020_2157_4(DatasetBase): + """ + This is a dataloader template for loaders cell landscape data. + """ + + def __init__( + self, + path: Union[str, None], + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.download = "https://ndownloader.figshare.com/files/17727365" + self.download_meta = [ + "https://ndownloader.figshare.com/files/21758835", + "https://ndownloader.figshare.com/files/22447898", + ] + + self.author = "Guo" + self.doi = "10.1038/s41586-020-2157-4" + self.healthy = True + self.normalization = "raw" + self.organism = "human" + self.protocol = "microwell-seq" + self.state_exact = "healthy" + self.year = 2020 + + self.obs_key_cellontology_original = "cell_ontology_class" + self.obs_key_dev_stage = "dev_stage" + self.obs_key_sex = "gender" + self.obs_key_age = "age" + + self.var_symbol_col = "index" + + def _download(self): + # download required files from loaders cell landscape publication data: https://figshare.com/articles/HCL_DGE_Data/7235471 + print(urllib.request.urlretrieve( + "https://ndownloader.figshare.com/files/17727365", + os.path.join(self.path, "human", self._directory_formatted_doi, "HCL_Fig1_adata.h5ad") + )) + print(urllib.request.urlretrieve( + "https://ndownloader.figshare.com/files/21758835", + os.path.join(self.path, "human", self._directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx") + )) + + print(urllib.request.urlretrieve( + "https://ndownloader.figshare.com/files/22447898", + os.path.join(self.path, "human", self._directory_formatted_doi, "annotation_rmbatch_data_revised417.zip") + )) + # extract the downloaded zip archive + with zipfile.ZipFile( + os.path.join(self.path, "human", self._directory_formatted_doi, "annotation_rmbatch_data_revised417.zip"), + "r" + ) as zip_ref: + zip_ref.extractall(os.path.join(self.path, self._directory_formatted_doi)) + + def _load_generalized(self, fn, sample_id: str): + """ + Attempt to find file, cache entire HCL if file was not found. + + :param fn: + :return: + """ + adata = anndata.read(os.path.join(self.path, "human", self._directory_formatted_doi, "HCL_Fig1_adata.h5ad")) + # convert to sparse matrix + adata.X = scipy.sparse.csr_matrix(adata.X).copy() + + # harmonise annotations + for col in ["batch", "tissue"]: + adata.obs[col] = adata.obs[col].astype("str") + adata.obs.index = adata.obs.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( + "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( + "FetalFemaleGonald", "FetalFemaleGonad", regex=True) + adata.obs.replace({"AdultJeJunum": "AdultJejunum", "AdultGallBladder": "AdultGallbladder", + "FetalFemaleGonald": "FetalFemaleGonad"}, regex=True, inplace=True) + adata.obs.index = ["-".join(i.split("-")[:-1]) for i in adata.obs.index] + + # load celltype labels and harmonise them + # This pandas code should work with pandas 1.2 but it does not and yields an empty data frame: + fig1_anno = pd.read_excel( + os.path.join(self.path, "human", self._directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), + index_col="cellnames", + engine="xlrd", # ToDo: Update when pandas xlsx reading with openpyxl is fixed: yields empty tables + ) + fig1_anno.index = fig1_anno.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( + "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( + "FetalFemaleGonald", "FetalFemaleGonad", regex=True) + + # check that the order of cells and cell labels is the same + assert np.all(fig1_anno.index == adata.obs.index) + + # add annotations to adata object and rename columns + adata.obs = pd.concat([adata.obs, fig1_anno[["cluster", "stage", "donor", "celltype"]]], axis=1) + adata.obs.columns = ["sample", "tissue", "n_genes", "n_counts", "cluster_global", "stage", "donor", + "celltype_global"] + + # add sample-wise annotations to the full adata object + df = pd.DataFrame( + columns=["Cell_barcode", "Sample", "Batch", "Cell_id", "Cluster_id", "Ages", "Development_stage", "Method", + "Gender", "Source", "Biomaterial", "Name", "ident", "Celltype"]) + for f in os.listdir( + os.path.join(self.path, "human", self._directory_formatted_doi, "annotation_rmbatch_data_revised417") + ): + df1 = pd.read_csv( + os.path.join( + self.path, "human", self._directory_formatted_doi, "annotation_rmbatch_data_revised417", f + ), encoding="unicode_escape") + df = pd.concat([df, df1], sort=True) + df = df.set_index("Cell_id") + adata = adata[[i in df.index for i in adata.obs.index]].copy() + a_idx = adata.obs.index.copy() + adata.obs = pd.concat([adata.obs, df[["Ages", "Celltype", "Cluster_id", "Gender", "Method", "Source"]]], axis=1) + assert np.all(a_idx == adata.obs.index) + + # remove mouse cells from the object # ToDo: add this back in as mouse data sets? + adata = adata[adata.obs["Source"] != "MCA2.0"].copy() + + # tidy up the column names of the obs annotations + adata.obs.columns = ["sample", "sub_tissue", "n_genes", "n_counts", "cluster_global", "dev_stage", + "donor", "celltype_global", "age", "celltype_specific", "cluster_specific", "gender", + "protocol", "source"] + + # create a tidy organ annotation which is then used in sfaira + adata.obs["organ"] = adata.obs["sub_tissue"] \ + .str.replace("Adult", "") \ + .str.replace("Fetal", "") \ + .str.replace("Neonatal", "") \ + .str.replace("Transverse", "") \ + .str.replace("Sigmoid", "") \ + .str.replace("Ascending", "") \ + .str.replace("Cord", "") \ + .str.replace("Peripheral", "") \ + .str.replace("CD34P", "") \ + .str.replace("Cerebellum", "Brain") \ + .str.replace("TemporalLobe", "Brain") \ + .str.replace("BoneMarrow", "Bone") \ + .str.replace("Spinal", "SpinalCord") \ + .str.replace("Intestine", "Stomach") \ + .str.replace("Eyes", "Eye") \ + .str.lower() + + self.adata = adata[adata.obs["sample"] == sample_id].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py new file mode 100644 index 000000000..1ca7d67ed --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adipose_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "adipose" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultAdipose_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py new file mode 100644 index 000000000..3bbf998fe --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adrenalgland_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "adrenalgland" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="NeonatalAdrenalGland_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py new file mode 100644 index 000000000..f103be794 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adrenalgland_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "adrenalgland" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalAdrenalGland_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py new file mode 100644 index 000000000..28fa28b71 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adrenalgland_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "adrenalgland" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalAdrenalGland_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py new file mode 100644 index 000000000..9250204e6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adrenalgland_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "adrenalgland" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultAdrenalGland_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py new file mode 100644 index 000000000..6df0c6ba7 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adrenalgland_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "adrenalgland" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalAdrenalGland_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py new file mode 100644 index 000000000..6bdad262b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adrenalgland_2020_microwell_han_006_10.1038/s41586-020-2157-4" + self.organ = "adrenalgland" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultAdrenalGland_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py new file mode 100644 index 000000000..e8fef6576 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_artery_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "artery" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultArtery_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py new file mode 100644 index 000000000..60f46cc6c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_bladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "bladder" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultBladder_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py new file mode 100644 index 000000000..80fce100f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_bladder_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "bladder" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultBladder_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py new file mode 100644 index 000000000..6a275fd56 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_bladder_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "bladder" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultGallbladder_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py new file mode 100644 index 000000000..f42cdfd46 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultPeripheralBlood_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py new file mode 100644 index 000000000..f12385f4c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="PeripheralBlood_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py new file mode 100644 index 000000000..2b5b470ee --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="CordBlood_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py new file mode 100644 index 000000000..ad3e7090c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultPeripheralBlood_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py new file mode 100644 index 000000000..9eb937bef --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="CordBloodCD34P_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py new file mode 100644 index 000000000..015d311ee --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_006_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="CordBloodCD34P_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py new file mode 100644 index 000000000..a3ada5b3f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_007_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="CordBlood_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py new file mode 100644 index 000000000..9f04b99d6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_bone_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "bone" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="BoneMarrow_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py new file mode 100644 index 000000000..3bf057903 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_bone_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "bone" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="BoneMarrow_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py new file mode 100644 index 000000000..373b2c325 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "brain" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Astrocyte": "Astrocyte", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Endothelial cell (APC)": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal endocrine cell": "Fetal endocrine cell", + "Fetal enterocyte ": "Fetal enterocyte ", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal Neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Oligodendrocyte": "Oligodendrocytes", + "Primordial germ cell": "Primordial germ cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "Neuronal stem cells" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalBrain_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py new file mode 100644 index 000000000..f6b377c91 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "brain" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Astrocyte": "Astrocyte", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Endothelial cell (APC)": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal endocrine cell": "Fetal endocrine cell", + "Fetal enterocyte ": "Fetal enterocyte ", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal Neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Oligodendrocyte": "Oligodendrocytes", + "Primordial germ cell": "Primordial germ cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "Neuronal stem cells" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalBrain_5") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py new file mode 100644 index 000000000..78487bf6f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "brain" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Astrocyte": "Astrocyte", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Endothelial cell (APC)": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal endocrine cell": "Fetal endocrine cell", + "Fetal enterocyte ": "Fetal enterocyte ", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal Neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Oligodendrocyte": "Oligodendrocytes", + "Primordial germ cell": "Primordial germ cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "Neuronal stem cells" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalBrain_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py new file mode 100644 index 000000000..bd4c0cfde --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "brain" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Astrocyte": "Astrocyte", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Endothelial cell (APC)": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal endocrine cell": "Fetal endocrine cell", + "Fetal enterocyte ": "Fetal enterocyte ", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal Neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Oligodendrocyte": "Oligodendrocytes", + "Primordial germ cell": "Primordial germ cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "Neuronal stem cells" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultTemporalLobe_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py new file mode 100644 index 000000000..ea462000c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "brain" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Astrocyte": "Astrocyte", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Endothelial cell (APC)": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal endocrine cell": "Fetal endocrine cell", + "Fetal enterocyte ": "Fetal enterocyte ", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal Neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Oligodendrocyte": "Oligodendrocytes", + "Primordial germ cell": "Primordial germ cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "Neuronal stem cells" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalBrain_6") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py new file mode 100644 index 000000000..749d7d71f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2020_microwell_han_006_10.1038/s41586-020-2157-4" + self.organ = "brain" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Astrocyte": "Astrocyte", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Endothelial cell (APC)": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal endocrine cell": "Fetal endocrine cell", + "Fetal enterocyte ": "Fetal enterocyte ", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal Neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Oligodendrocyte": "Oligodendrocytes", + "Primordial germ cell": "Primordial germ cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "Neuronal stem cells" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultCerebellum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py new file mode 100644 index 000000000..8524c25e4 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_calvaria_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "calvaria" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalCalvaria_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py new file mode 100644 index 000000000..521bb924e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_cervix_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "cervix" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultCervix_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py new file mode 100644 index 000000000..560e297aa --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_chorionicvillus_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "chorionicvillus" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="ChorionicVillus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py new file mode 100644 index 000000000..01ce9f3ee --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py @@ -0,0 +1,50 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Colon" + self.class_maps = { + "0": { + "Enterocyte progenitor": "Enterocyte Progenitors", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Enterocyte": "Enterocytes", + "Epithelial cell": "Epithelial cell", + "T cell": "T cell", + "Stromal cell": "Stromal", + "Macrophage": "Macrophage", + "B cell": "B cell", + "Smooth muscle cell": "Smooth Muscle", + "Neutrophil": "Neutrophil", + "Endothelial cell (APC)": "Endothelial", + "Dendritic cell": "Dendritic cell", + "Mast cell": "Mast cell", + "Endothelial cell": "Endothelial", + "Fetal Neuron": "Fetal Neuron", + "Fetal epithelial progenitor": "Enterocyte Progenitors", + "Fibroblast": "Fibroblast", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial", + "Fetal stromal cell": "Stromal", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Monocyte": "Monocyte", + "Erythroid cell": "Erythroid cell", + "Fetal endocrine cell": "Enteroendocrine cells", + "Primordial germ cell": "Primordial germ cell", + "Fetal enterocyte": "Fetal enterocyte", + "M2 Macrophage": "Macrophage", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultAscendingColon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py new file mode 100644 index 000000000..1082dfa60 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py @@ -0,0 +1,50 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "Colon" + self.class_maps = { + "0": { + "Enterocyte progenitor": "Enterocyte Progenitors", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Enterocyte": "Enterocytes", + "Epithelial cell": "Epithelial cell", + "T cell": "T cell", + "Stromal cell": "Stromal", + "Macrophage": "Macrophage", + "B cell": "B cell", + "Smooth muscle cell": "Smooth Muscle", + "Neutrophil": "Neutrophil", + "Endothelial cell (APC)": "Endothelial", + "Dendritic cell": "Dendritic cell", + "Mast cell": "Mast cell", + "Endothelial cell": "Endothelial", + "Fetal Neuron": "Fetal Neuron", + "Fetal epithelial progenitor": "Enterocyte Progenitors", + "Fibroblast": "Fibroblast", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial", + "Fetal stromal cell": "Stromal", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Monocyte": "Monocyte", + "Erythroid cell": "Erythroid cell", + "Fetal endocrine cell": "Enteroendocrine cells", + "Primordial germ cell": "Primordial germ cell", + "Fetal enterocyte": "Fetal enterocyte", + "M2 Macrophage": "Macrophage", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultColon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py new file mode 100644 index 000000000..099d79147 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py @@ -0,0 +1,50 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "Colon" + self.class_maps = { + "0": { + "Enterocyte progenitor": "Enterocyte Progenitors", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Enterocyte": "Enterocytes", + "Epithelial cell": "Epithelial cell", + "T cell": "T cell", + "Stromal cell": "Stromal", + "Macrophage": "Macrophage", + "B cell": "B cell", + "Smooth muscle cell": "Smooth Muscle", + "Neutrophil": "Neutrophil", + "Endothelial cell (APC)": "Endothelial", + "Dendritic cell": "Dendritic cell", + "Mast cell": "Mast cell", + "Endothelial cell": "Endothelial", + "Fetal Neuron": "Fetal Neuron", + "Fetal epithelial progenitor": "Enterocyte Progenitors", + "Fibroblast": "Fibroblast", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial", + "Fetal stromal cell": "Stromal", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Monocyte": "Monocyte", + "Erythroid cell": "Erythroid cell", + "Fetal endocrine cell": "Enteroendocrine cells", + "Primordial germ cell": "Primordial germ cell", + "Fetal enterocyte": "Fetal enterocyte", + "M2 Macrophage": "Macrophage", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultTransverseColon_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py new file mode 100644 index 000000000..30da95dfe --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py @@ -0,0 +1,50 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "Colon" + self.class_maps = { + "0": { + "Enterocyte progenitor": "Enterocyte Progenitors", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Enterocyte": "Enterocytes", + "Epithelial cell": "Epithelial cell", + "T cell": "T cell", + "Stromal cell": "Stromal", + "Macrophage": "Macrophage", + "B cell": "B cell", + "Smooth muscle cell": "Smooth Muscle", + "Neutrophil": "Neutrophil", + "Endothelial cell (APC)": "Endothelial", + "Dendritic cell": "Dendritic cell", + "Mast cell": "Mast cell", + "Endothelial cell": "Endothelial", + "Fetal Neuron": "Fetal Neuron", + "Fetal epithelial progenitor": "Enterocyte Progenitors", + "Fibroblast": "Fibroblast", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial", + "Fetal stromal cell": "Stromal", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Monocyte": "Monocyte", + "Erythroid cell": "Erythroid cell", + "Fetal endocrine cell": "Enteroendocrine cells", + "Primordial germ cell": "Primordial germ cell", + "Fetal enterocyte": "Fetal enterocyte", + "M2 Macrophage": "Macrophage", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultSigmoidColon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py new file mode 100644 index 000000000..122bd7bf8 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_duodenum_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "duodenum" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultDuodenum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py new file mode 100644 index 000000000..c50ae3fac --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_epityphlon_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "epityphlon" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultEpityphlon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py new file mode 100644 index 000000000..2f948e5a0 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py @@ -0,0 +1,47 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_esophagus_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Esophagus" + self.class_maps = { + "0": { + "Fibroblast": "Fibroblast", + "Basal cell": "Basal cell", + "Stratified epithelial cell": "Stratified epithelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Macrophage": "Macrophage", + "B cell": "B cell", + "T cell": "T cell", + "Dendritic cell": "Dendritic cell", + "Mast cell": "Mast cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Stromal cell": "Stromal cell", + "Monocyte": "Monocyte", + "Smooth muscle cell": "Smooth muscle cell", + "Endothelial cell": "Endothelial cell", + "Neutrophil": "Neutrophil", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Fetal stromal cell": "Fetal stromal cell", + "CB CD34+": "CB CD34+", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Gastric endocrine cell": "Gastric endocrine cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Loop of Henle": "Loop of Henle", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultEsophagus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py new file mode 100644 index 000000000..af43f661a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py @@ -0,0 +1,47 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_esophagus_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "Esophagus" + self.class_maps = { + "0": { + "Fibroblast": "Fibroblast", + "Basal cell": "Basal cell", + "Stratified epithelial cell": "Stratified epithelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Macrophage": "Macrophage", + "B cell": "B cell", + "T cell": "T cell", + "Dendritic cell": "Dendritic cell", + "Mast cell": "Mast cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Stromal cell": "Stromal cell", + "Monocyte": "Monocyte", + "Smooth muscle cell": "Smooth muscle cell", + "Endothelial cell": "Endothelial cell", + "Neutrophil": "Neutrophil", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Fetal stromal cell": "Fetal stromal cell", + "CB CD34+": "CB CD34+", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Gastric endocrine cell": "Gastric endocrine cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Loop of Henle": "Loop of Henle", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultEsophagus_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py new file mode 100644 index 000000000..10dde6d24 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py @@ -0,0 +1,46 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_eye_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Eye" + self.class_maps = { + "0": { + "Fetal neuron": "Fetal neuron", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Erythroid cell": "Erythroid cell", + "Primordial germ cell": "Primordial germ cell", + "Endothelial cell": "Endothelial cell", + "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", + "Fetal stromal cell": "Fetal stromal cell", + "Fetal fibroblast": "Fibroblast", + "Fetal Neuron": "Fetal neuron", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Dendritic cell": "Dendritic cell", + "Fetal endocrine cell": "Fetal endocrine cell", + "Macrophage": "Macrophage", + "T cell": "T cell", + "Basal cell": "Basal cell", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Epithelial cell (intermediated)": "Epithelial cell (intermediated)", + "Stratified epithelial cell": "Stratified epithelial cell", + "CB CD34+": "CB CD34_pos", + "hESC": "hESC" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalEyes_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py new file mode 100644 index 000000000..fec492e2e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_fallopiantube_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "fallopiantube" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultFallopiantube_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py new file mode 100644 index 000000000..1fcb34991 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_femalegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "femalegonad" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalFemaleGonad_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py new file mode 100644 index 000000000..257d7750a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_femalegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "femalegonad" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalFemaleGonad_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py new file mode 100644 index 000000000..b3674ca67 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_gallbladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "gallbladder" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultGallbladder_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py new file mode 100644 index 000000000..4b25db497 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_heart_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "heart" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalHeart_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py new file mode 100644 index 000000000..839528da7 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_heart_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "heart" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultHeart_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py new file mode 100644 index 000000000..a4cc0fdfb --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_heart_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "heart" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultHeart_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py new file mode 100644 index 000000000..7439b9fec --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_heart_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "heart" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalHeart_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py new file mode 100644 index 000000000..625583aa7 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py @@ -0,0 +1,19 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_hesc_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "hesc" + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="HESC_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py new file mode 100644 index 000000000..66d4209fc --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py @@ -0,0 +1,49 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_ileum_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "ileum" + self.class_maps = { + "0": { + "B cell": "B cells", + "B cell (Plasmocyte)": "Plasma Cells", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", + "Enterocyte": "Enterocytes", + "Enterocyte progenitor": "Enterocytes", + "Epithelial cell": "Epithelial cell", + "Fetal Neuron": "Fetal neuron", + "Fetal enterocyte": "Enterocytes", + "Fetal epithelial progenitor": "Progenitors", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblasts", + "Hepatocyte/Endodermal cell": "Hepatocyte/Endodermal cell", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cells", + "Monocyte": "Monocyte", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Proliferating T cell": "T cells", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cells", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultIleum_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py new file mode 100644 index 000000000..9db8f62a8 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_jejunum_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "jejunum" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultJejunum_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py new file mode 100644 index 000000000..77b2e117e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultKidney_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py new file mode 100644 index 000000000..42b21e827 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultKidney_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py new file mode 100644 index 000000000..2811fe77f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultKidney_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py new file mode 100644 index 000000000..07cad1336 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalKidney_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py new file mode 100644 index 000000000..751e9470f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalKidney_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py new file mode 100644 index 000000000..a1b0a195e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_006_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalKidney_5") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py new file mode 100644 index 000000000..9793a4b2e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_007_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalKidney_6") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py new file mode 100644 index 000000000..1318b3dba --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Liver" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "Plasma B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Late Erythroid", + "Erythroid progenitor cell (RP high)": "Early Erythroid", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Non inflammatory macrophages", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", + "Smooth muscle cell": "Smooth muscle cell", + "T cell": "T cell" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultLiver_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py new file mode 100644 index 000000000..59b07abf1 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "Liver" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "Plasma B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Late Erythroid", + "Erythroid progenitor cell (RP high)": "Early Erythroid", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Non inflammatory macrophages", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", + "Smooth muscle cell": "Smooth muscle cell", + "T cell": "T cell" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultLiver_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py new file mode 100644 index 000000000..1701446f1 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "Liver" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "Plasma B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Late Erythroid", + "Erythroid progenitor cell (RP high)": "Early Erythroid", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Non inflammatory macrophages", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", + "Smooth muscle cell": "Smooth muscle cell", + "T cell": "T cell" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultLiver_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py new file mode 100644 index 000000000..119f16030 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "Liver" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "Plasma B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Late Erythroid", + "Erythroid progenitor cell (RP high)": "Early Erythroid", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Non inflammatory macrophages", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", + "Smooth muscle cell": "Smooth muscle cell", + "T cell": "T cell" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="Liver_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py new file mode 100644 index 000000000..94fd323db --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "Liver" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "Plasma B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Late Erythroid", + "Erythroid progenitor cell (RP high)": "Early Erythroid", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Non inflammatory macrophages", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", + "Smooth muscle cell": "Smooth muscle cell", + "T cell": "T cell" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="Liver_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py new file mode 100644 index 000000000..2487b789a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py @@ -0,0 +1,73 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "lung" + self.class_maps = { + "0": { + "AT2 cell": "AT2", + "Antigen presenting cell (RPS high)": "unknown", + "B cell": "B cell lineage", + "B cell (Plasmocyte)": "B cell lineage", + "Basal cell": "Basal", + "CB CD34+": "Fetal airway progenitors", + "Chondrocyte": "1_Stroma", + "Dendritic cell": "Dendritic cells", + "Endothelial cell": "1_Endothelial", + "Endothelial cell (APC)": "1_Endothelial", + "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", + "Enterocyte progenitor": "1_Epithelial", + "Epithelial cell": "1_Epithelial", + "Epithelial cell (intermediated)": "1_Epithelial", + "Erythroid cell": "Erythrocytes", + "Erythroid progenitor cell (RP high)": "Erythrocytes", + "Fasciculata cell": "unknown", + "Fetal Neuron": "unknown", + "Fetal chondrocyte": "1_Stroma", + "Fetal endocrine cell": "unknown", + "Fetal enterocyte ": "1_Epithelial", + "Fetal epithelial progenitor": "1_Epithelial", + "Fetal fibroblast": "Fibroblasts", + "Fetal mesenchymal progenitor": "1_Stroma", + "Fetal neuron": "unknown", + "Fetal skeletal muscle cell": "unknown", + "Fetal stromal cell": "1_Stroma", + "Fibroblast": "Fibroblasts", + "Gastric endocrine cell": "unknown", + "Goblet cell": "Secretory", + "Kidney intercalated cell": "unknown", + "Loop of Henle": "unknown", + "M2 Macrophage": "Macrophages", + "Macrophage": "Macrophages", + "Mast cell": "Mast cells", + "Mesothelial cell": "Mast cells", + "Monocyte": "Monocytes", + "Myeloid cell": "2_Myeloid", + "Neutrophil": "Neutrophilic", + "Neutrophil (RPS high)": "Neutrophilic", + "Primordial germ cell": "unknown", + "Proliferating T cell": "T cell lineage", + "Proximal tubule progenitor": "unknown", + "Sinusoidal endothelial cell": "1_Endothelial", + "Smooth muscle cell": "2_Smooth Muscle", + "Stratified epithelial cell": "1_Epithelial", + "Stromal cell": "1_Stroma", + "T cell": "T cell lineage", + "Ventricle cardiomyocyte": "1_Stroma", + "hESC": "Fetal airway progenitors", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalLung_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py new file mode 100644 index 000000000..d1d6a73c4 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py @@ -0,0 +1,73 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "lung" + self.class_maps = { + "0": { + "AT2 cell": "AT2", + "Antigen presenting cell (RPS high)": "unknown", + "B cell": "B cell lineage", + "B cell (Plasmocyte)": "B cell lineage", + "Basal cell": "Basal", + "CB CD34+": "Fetal airway progenitors", + "Chondrocyte": "1_Stroma", + "Dendritic cell": "Dendritic cells", + "Endothelial cell": "1_Endothelial", + "Endothelial cell (APC)": "1_Endothelial", + "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", + "Enterocyte progenitor": "1_Epithelial", + "Epithelial cell": "1_Epithelial", + "Epithelial cell (intermediated)": "1_Epithelial", + "Erythroid cell": "Erythrocytes", + "Erythroid progenitor cell (RP high)": "Erythrocytes", + "Fasciculata cell": "unknown", + "Fetal Neuron": "unknown", + "Fetal chondrocyte": "1_Stroma", + "Fetal endocrine cell": "unknown", + "Fetal enterocyte ": "1_Epithelial", + "Fetal epithelial progenitor": "1_Epithelial", + "Fetal fibroblast": "Fibroblasts", + "Fetal mesenchymal progenitor": "1_Stroma", + "Fetal neuron": "unknown", + "Fetal skeletal muscle cell": "unknown", + "Fetal stromal cell": "1_Stroma", + "Fibroblast": "Fibroblasts", + "Gastric endocrine cell": "unknown", + "Goblet cell": "Secretory", + "Kidney intercalated cell": "unknown", + "Loop of Henle": "unknown", + "M2 Macrophage": "Macrophages", + "Macrophage": "Macrophages", + "Mast cell": "Mast cells", + "Mesothelial cell": "Mast cells", + "Monocyte": "Monocytes", + "Myeloid cell": "2_Myeloid", + "Neutrophil": "Neutrophilic", + "Neutrophil (RPS high)": "Neutrophilic", + "Primordial germ cell": "unknown", + "Proliferating T cell": "T cell lineage", + "Proximal tubule progenitor": "unknown", + "Sinusoidal endothelial cell": "1_Endothelial", + "Smooth muscle cell": "2_Smooth Muscle", + "Stratified epithelial cell": "1_Epithelial", + "Stromal cell": "1_Stroma", + "T cell": "T cell lineage", + "Ventricle cardiomyocyte": "1_Stroma", + "hESC": "Fetal airway progenitors", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultLung_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py new file mode 100644 index 000000000..9dc30ed7e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py @@ -0,0 +1,73 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "lung" + self.class_maps = { + "0": { + "AT2 cell": "AT2", + "Antigen presenting cell (RPS high)": "unknown", + "B cell": "B cell lineage", + "B cell (Plasmocyte)": "B cell lineage", + "Basal cell": "Basal", + "CB CD34+": "Fetal airway progenitors", + "Chondrocyte": "1_Stroma", + "Dendritic cell": "Dendritic cells", + "Endothelial cell": "1_Endothelial", + "Endothelial cell (APC)": "1_Endothelial", + "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", + "Enterocyte progenitor": "1_Epithelial", + "Epithelial cell": "1_Epithelial", + "Epithelial cell (intermediated)": "1_Epithelial", + "Erythroid cell": "Erythrocytes", + "Erythroid progenitor cell (RP high)": "Erythrocytes", + "Fasciculata cell": "unknown", + "Fetal Neuron": "unknown", + "Fetal chondrocyte": "1_Stroma", + "Fetal endocrine cell": "unknown", + "Fetal enterocyte ": "1_Epithelial", + "Fetal epithelial progenitor": "1_Epithelial", + "Fetal fibroblast": "Fibroblasts", + "Fetal mesenchymal progenitor": "1_Stroma", + "Fetal neuron": "unknown", + "Fetal skeletal muscle cell": "unknown", + "Fetal stromal cell": "1_Stroma", + "Fibroblast": "Fibroblasts", + "Gastric endocrine cell": "unknown", + "Goblet cell": "Secretory", + "Kidney intercalated cell": "unknown", + "Loop of Henle": "unknown", + "M2 Macrophage": "Macrophages", + "Macrophage": "Macrophages", + "Mast cell": "Mast cells", + "Mesothelial cell": "Mast cells", + "Monocyte": "Monocytes", + "Myeloid cell": "2_Myeloid", + "Neutrophil": "Neutrophilic", + "Neutrophil (RPS high)": "Neutrophilic", + "Primordial germ cell": "unknown", + "Proliferating T cell": "T cell lineage", + "Proximal tubule progenitor": "unknown", + "Sinusoidal endothelial cell": "1_Endothelial", + "Smooth muscle cell": "2_Smooth Muscle", + "Stratified epithelial cell": "1_Epithelial", + "Stromal cell": "1_Stroma", + "T cell": "T cell lineage", + "Ventricle cardiomyocyte": "1_Stroma", + "hESC": "Fetal airway progenitors", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultLung_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py new file mode 100644 index 000000000..6a85c3db0 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py @@ -0,0 +1,73 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "lung" + self.class_maps = { + "0": { + "AT2 cell": "AT2", + "Antigen presenting cell (RPS high)": "unknown", + "B cell": "B cell lineage", + "B cell (Plasmocyte)": "B cell lineage", + "Basal cell": "Basal", + "CB CD34+": "Fetal airway progenitors", + "Chondrocyte": "1_Stroma", + "Dendritic cell": "Dendritic cells", + "Endothelial cell": "1_Endothelial", + "Endothelial cell (APC)": "1_Endothelial", + "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", + "Enterocyte progenitor": "1_Epithelial", + "Epithelial cell": "1_Epithelial", + "Epithelial cell (intermediated)": "1_Epithelial", + "Erythroid cell": "Erythrocytes", + "Erythroid progenitor cell (RP high)": "Erythrocytes", + "Fasciculata cell": "unknown", + "Fetal Neuron": "unknown", + "Fetal chondrocyte": "1_Stroma", + "Fetal endocrine cell": "unknown", + "Fetal enterocyte ": "1_Epithelial", + "Fetal epithelial progenitor": "1_Epithelial", + "Fetal fibroblast": "Fibroblasts", + "Fetal mesenchymal progenitor": "1_Stroma", + "Fetal neuron": "unknown", + "Fetal skeletal muscle cell": "unknown", + "Fetal stromal cell": "1_Stroma", + "Fibroblast": "Fibroblasts", + "Gastric endocrine cell": "unknown", + "Goblet cell": "Secretory", + "Kidney intercalated cell": "unknown", + "Loop of Henle": "unknown", + "M2 Macrophage": "Macrophages", + "Macrophage": "Macrophages", + "Mast cell": "Mast cells", + "Mesothelial cell": "Mast cells", + "Monocyte": "Monocytes", + "Myeloid cell": "2_Myeloid", + "Neutrophil": "Neutrophilic", + "Neutrophil (RPS high)": "Neutrophilic", + "Primordial germ cell": "unknown", + "Proliferating T cell": "T cell lineage", + "Proximal tubule progenitor": "unknown", + "Sinusoidal endothelial cell": "1_Endothelial", + "Smooth muscle cell": "2_Smooth Muscle", + "Stratified epithelial cell": "1_Epithelial", + "Stromal cell": "1_Stroma", + "T cell": "T cell lineage", + "Ventricle cardiomyocyte": "1_Stroma", + "hESC": "Fetal airway progenitors", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultLung_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py new file mode 100644 index 000000000..0083c8e5b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py @@ -0,0 +1,73 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "lung" + self.class_maps = { + "0": { + "AT2 cell": "AT2", + "Antigen presenting cell (RPS high)": "unknown", + "B cell": "B cell lineage", + "B cell (Plasmocyte)": "B cell lineage", + "Basal cell": "Basal", + "CB CD34+": "Fetal airway progenitors", + "Chondrocyte": "1_Stroma", + "Dendritic cell": "Dendritic cells", + "Endothelial cell": "1_Endothelial", + "Endothelial cell (APC)": "1_Endothelial", + "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", + "Enterocyte progenitor": "1_Epithelial", + "Epithelial cell": "1_Epithelial", + "Epithelial cell (intermediated)": "1_Epithelial", + "Erythroid cell": "Erythrocytes", + "Erythroid progenitor cell (RP high)": "Erythrocytes", + "Fasciculata cell": "unknown", + "Fetal Neuron": "unknown", + "Fetal chondrocyte": "1_Stroma", + "Fetal endocrine cell": "unknown", + "Fetal enterocyte ": "1_Epithelial", + "Fetal epithelial progenitor": "1_Epithelial", + "Fetal fibroblast": "Fibroblasts", + "Fetal mesenchymal progenitor": "1_Stroma", + "Fetal neuron": "unknown", + "Fetal skeletal muscle cell": "unknown", + "Fetal stromal cell": "1_Stroma", + "Fibroblast": "Fibroblasts", + "Gastric endocrine cell": "unknown", + "Goblet cell": "Secretory", + "Kidney intercalated cell": "unknown", + "Loop of Henle": "unknown", + "M2 Macrophage": "Macrophages", + "Macrophage": "Macrophages", + "Mast cell": "Mast cells", + "Mesothelial cell": "Mast cells", + "Monocyte": "Monocytes", + "Myeloid cell": "2_Myeloid", + "Neutrophil": "Neutrophilic", + "Neutrophil (RPS high)": "Neutrophilic", + "Primordial germ cell": "unknown", + "Proliferating T cell": "T cell lineage", + "Proximal tubule progenitor": "unknown", + "Sinusoidal endothelial cell": "1_Endothelial", + "Smooth muscle cell": "2_Smooth Muscle", + "Stratified epithelial cell": "1_Epithelial", + "Stromal cell": "1_Stroma", + "T cell": "T cell lineage", + "Ventricle cardiomyocyte": "1_Stroma", + "hESC": "Fetal airway progenitors", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalLung_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py new file mode 100644 index 000000000..9acf2ac6e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_malegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "malegonad" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal acinar cell": "Fetal acinar cell", + "Fetal chondrocyte": "Fetal chondrocyte", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal fibroblast": "Fetal fibroblast", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal neuron", + "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", + "Fetal stromal cell": "Fetal stromal cell", + "Immature sertoli cell (Pre-Sertoli cell)": "Sertoli cells", + "Loop of Henle": "Loop of Henle", + "Macrophage": "Macrophages", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalMaleGonad_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py new file mode 100644 index 000000000..8964fa222 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_malegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "malegonad" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal acinar cell": "Fetal acinar cell", + "Fetal chondrocyte": "Fetal chondrocyte", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal fibroblast": "Fetal fibroblast", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal neuron", + "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", + "Fetal stromal cell": "Fetal stromal cell", + "Immature sertoli cell (Pre-Sertoli cell)": "Sertoli cells", + "Loop of Henle": "Loop of Henle", + "Macrophage": "Macrophages", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalMaleGonad_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py new file mode 100644 index 000000000..1ac3d1e92 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "muscle" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalMuscle_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py new file mode 100644 index 000000000..9af4d2f15 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_muscle_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "muscle" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultMuscle_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py new file mode 100644 index 000000000..82e0aa456 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_omentum_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "omentum" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultOmentum_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py new file mode 100644 index 000000000..c903a8e78 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_omentum_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "omentum" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultOmentum_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py new file mode 100644 index 000000000..37652c8ca --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_omentum_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "omentum" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultOmentum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py new file mode 100644 index 000000000..a5b273cbd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py @@ -0,0 +1,61 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Pancreas" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Basal cell": "Basal cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Mesenchymal Cell", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Immature sertoli cell (Pre-Sertoli cell)": "Immature sertoli cell (Pre-Sertoli cell)", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultPancreas_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py new file mode 100644 index 000000000..c8eb43976 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py @@ -0,0 +1,61 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "Pancreas" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Basal cell": "Basal cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Mesenchymal Cell", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Immature sertoli cell (Pre-Sertoli cell)": "Immature sertoli cell (Pre-Sertoli cell)", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalPancreas_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py new file mode 100644 index 000000000..aa5ba9d05 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py @@ -0,0 +1,61 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "Pancreas" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Basal cell": "Basal cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Mesenchymal Cell", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Immature sertoli cell (Pre-Sertoli cell)": "Immature sertoli cell (Pre-Sertoli cell)", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalPancreas_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py new file mode 100644 index 000000000..02662bd9f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py @@ -0,0 +1,61 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "Pancreas" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Basal cell": "Basal cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Mesenchymal Cell", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Immature sertoli cell (Pre-Sertoli cell)": "Immature sertoli cell (Pre-Sertoli cell)", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalPancreas_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py new file mode 100644 index 000000000..537345671 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_placenta_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Placenta" + self.class_maps = { + "0": { + "Fibroblast": "Fibroblast", + "Macrophage": "Macrophage", + "Epithelial cell": "Epithelial cell", + "Erythroid cell": "Erythroid cell", + "Fetal stromal cell": "Fetal stromal cell", + "Stromal cell": "Stromal cell", + "Smooth muscle cell": "Smooth muscle cell", + "Endothelial cell": "Endothelial cell", + "T cell": "T cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Intermediated cell": "Intermediated cell", + "Dendritic cell": "Dendritic cell", + "CB CD34+": "CB CD34+", + "Stratified epithelial cell": "Stratified epithelial cell", + "Fetal neuron": "Fetal neuron", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "B cell": "B cell", + "Epithelial cell (intermediated)": "Epithelial cell (intermediated)", + "hESC": "hESC", + "Basal cell": "Basal cell", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal fibroblast": "Fetal fibroblast", + "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", + "M2 Macrophage": "M2 Macrophage", + "Myeloid cell": "Myeloid cell", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="Placenta_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py new file mode 100644 index 000000000..efcd5c949 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pleura_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "pleura" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultPleura_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py new file mode 100644 index 000000000..1a6bef219 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py @@ -0,0 +1,43 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_prostate_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "prostate" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Basal cell": "Basal cell", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell (intermediated)": "Epithelial cell (intermediated)", + "Fasciculata cell": "Fasciculata cell", + "Fetal enterocyte": "Fetal enterocyte", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Primordial germ cell": "Primordial germ cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultProstate_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py new file mode 100644 index 000000000..25af7abb2 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py @@ -0,0 +1,38 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_rectum_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "rectum" + self.class_maps = { + "0": { + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Dendritic cell": "Dendritic cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Enterocyte": "Enterocyte", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Erythroid cell": "Erythroid cell", + "Fetal stromal cell": "Fetal stromal cell", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultRectum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py new file mode 100644 index 000000000..1df96a84c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_rib_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "rib" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalRib_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py new file mode 100644 index 000000000..d37bf1bca --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_rib_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "rib" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalRib_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py new file mode 100644 index 000000000..26d732174 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py @@ -0,0 +1,52 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_skin_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "skin" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "Basal cell": "Basal cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Epithelial cell": "Epithelial cell", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal fibroblast": "Fetal fibroblast", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Kidney intercalated cell": "Kidney intercalated cell", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalSkin_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py new file mode 100644 index 000000000..591ed500f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py @@ -0,0 +1,52 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_skin_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "skin" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "Basal cell": "Basal cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Epithelial cell": "Epithelial cell", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal fibroblast": "Fetal fibroblast", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Kidney intercalated cell": "Kidney intercalated cell", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalSkin_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py new file mode 100644 index 000000000..935cad23b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_spinalcord_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "spinalcord" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalSpinalCord_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py new file mode 100644 index 000000000..f5dad107a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py @@ -0,0 +1,44 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_spleen_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Spleen" + self.class_maps = { + "0": { + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Neutrophil": "Neutrophil", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "B cell": "B cell", + "Macrophage": "Macrophage", + "T cell": "T cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Dendritic cell": "Dendritic cell", + "CB CD34+": "CB CD34+", + "Erythroid cell": "Erythroid cell", + "Monocyte": "Monocyte", + "Endothelial cell": "Endothelial cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Proliferating T cell": "Proliferating T cell", + "Fibroblast": "Fibroblast", + "Stromal cell": "Stromal cell", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Mast cell": "Mast cell", + "Smooth muscle cell": "Smooth muscle cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultSpleenParenchyma_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py new file mode 100644 index 000000000..fd40ef79a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py @@ -0,0 +1,44 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_spleen_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "Spleen" + self.class_maps = { + "0": { + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Neutrophil": "Neutrophil", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "B cell": "B cell", + "Macrophage": "Macrophage", + "T cell": "T cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Dendritic cell": "Dendritic cell", + "CB CD34+": "CB CD34+", + "Erythroid cell": "Erythroid cell", + "Monocyte": "Monocyte", + "Endothelial cell": "Endothelial cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Proliferating T cell": "Proliferating T cell", + "Fibroblast": "Fibroblast", + "Stromal cell": "Stromal cell", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Mast cell": "Mast cell", + "Smooth muscle cell": "Smooth muscle cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultSpleen_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py new file mode 100644 index 000000000..997a1795d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultStomach_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py new file mode 100644 index 000000000..c32a24ee2 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalStomach_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py new file mode 100644 index 000000000..d93ed24d7 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalIntestine_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py new file mode 100644 index 000000000..9707559a3 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalIntestine_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py new file mode 100644 index 000000000..5319b9ce6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalStomach_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py new file mode 100644 index 000000000..76b06f3dd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_006_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalIntestine_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py new file mode 100644 index 000000000..9de6d6c5a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_007_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalIntestine_5") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py new file mode 100644 index 000000000..115dd11ea --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_008_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultStomach_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py new file mode 100644 index 000000000..d26759d0d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_009_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultStomach_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py new file mode 100644 index 000000000..58741fd2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_010_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalIntestine_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py new file mode 100644 index 000000000..9ec801179 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py @@ -0,0 +1,36 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_thymus_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "thymus" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Erythroid cell": "Ery", + "Erythroid progenitor cell (RP high)": "Ery", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Macrophage": "Mac", + "Monocyte": "Mono", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Proliferating T cell": "Proliferating T cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalThymus_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py new file mode 100644 index 000000000..9dd3bf713 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py @@ -0,0 +1,36 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_thymus_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "thymus" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Erythroid cell": "Ery", + "Erythroid progenitor cell (RP high)": "Ery", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Macrophage": "Mac", + "Monocyte": "Mono", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Proliferating T cell": "Proliferating T cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalThymus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py new file mode 100644 index 000000000..74b94bff4 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_thyroid_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "thyroid" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultThyroid_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py new file mode 100644 index 000000000..2e932292b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_thyroid_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "thyroid" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultThyroid_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py new file mode 100644 index 000000000..368c39418 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_trachea_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "trachea" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultTrachea_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py new file mode 100644 index 000000000..0ddad9999 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_ureter_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "ureter" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultUreter_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py new file mode 100644 index 000000000..82efa82fb --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_uterus_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "uterus" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultUterus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py new file mode 100644 index 000000000..ed648d9db --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py @@ -0,0 +1,196 @@ +import anndata +import os +from typing import Union +import scipy.sparse +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + """ + This data loader directly processes the data file provided under the download link. To obtain the file, you need to create a + free account at https://www.synapse.org. You can then use those login credentials to download the file with python + using the synapse client, installable via `pip install synapseclient`: + + import synapseclient + import shutil + syn = synapseclient.Synapse() + syn.login("synapse_username","password") + syn21625095 = syn.get(entity="syn21625095") + shutil.move(syn21625095.path, "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") + + :param path: + :param meta_path: + :param kwargs: + """ + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_10x_travaglini_001_10.1038/s41586-020-2922-4" + + self.download = "https://www.synapse.org/#!Synapse:syn21041850" + self.download_meta = None + + self.author = "Krasnow" + self.doi = "10.1038/s41586-020-2922-4" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + + self.class_maps = { + "0": { + "Adventitial Fibroblast_P1": "Fibroblasts", + "Adventitial Fibroblast_P2": "Fibroblasts", + "Adventitial Fibroblast_P3": "Fibroblasts", + "Airway Smooth Muscle_P1": "Airway smooth muscle", + "Airway Smooth Muscle_P2": "Airway smooth muscle", + "Airway Smooth Muscle_P3": "Airway smooth muscle", + "Alveolar Epithelial Type 1_P1": "AT1", + "Alveolar Epithelial Type 1_P2": "AT1", + "Alveolar Epithelial Type 1_P3": "AT1", + "Alveolar Epithelial Type 2_P1": "AT2", + "Alveolar Epithelial Type 2_P2": "AT2", + "Alveolar Epithelial Type 2_P3": "AT2", + "Alveolar Fibroblast_P1": "Fibroblasts", + "Alveolar Fibroblast_P2": "Fibroblasts", + "Alveolar Fibroblast_P3": "Fibroblasts", + "Artery_P1": "Arterial", + "Artery_P2": "Arterial", + "Artery_P3": "Arterial", + "B_P1": "B cell lineage", + "B_P2": "B cell lineage", + "B_P3": "B cell lineage", + "Basal_P1": "Basal", + "Basal_P2": "Basal", + "Basal_P3": "Basal", + "Basophil/Mast 1_P1": "Mast cells", + "Basophil/Mast 1_P2": "Mast cells", + "Basophil/Mast 1_P3": "Mast cells", + "Basophil/Mast 2_P3": "Mast cells", + "Bronchial Vessel 1_P1": "Bronchial Vessel 1", + "Bronchial Vessel 1_P3": "Bronchial Vessel 1", + "Bronchial Vessel 2_P1": "Bronchial Vessel 2", + "Bronchial Vessel 2_P3": "Bronchial Vessel 2", + "CD4+ Memory/Effector T_P1": "T cell lineage", + "CD4+ Memory/Effector T_P2": "T cell lineage", + "CD4+ Memory/Effector T_P3": "T cell lineage", + "CD4+ Naive T_P1": "T cell lineage", + "CD4+ Naive T_P2": "T cell lineage", + "CD4+ Naive T_P3": "T cell lineage", + "CD8+ Memory/Effector T_P1": "T cell lineage", + "CD8+ Memory/Effector T_P2": "T cell lineage", + "CD8+ Memory/Effector T_P3": "T cell lineage", + "CD8+ Naive T_P1": "T cell lineage", + "CD8+ Naive T_P2": "T cell lineage", + "CD8+ Naive T_P3": "T cell lineage", + "Capillary Aerocyte_P1": "Capillary", + "Capillary Aerocyte_P2": "Capillary", + "Capillary Aerocyte_P3": "Capillary", + "Capillary Intermediate 1_P2": "Capillary Intermediate 1", + "Capillary Intermediate 2_P2": "Capillary Intermediate 2", + "Capillary_P1": "Capillary", + "Capillary_P2": "Capillary", + "Capillary_P3": "Capillary", + "Ciliated_P1": "Multiciliated lineage", + "Ciliated_P2": "Multiciliated lineage", + "Ciliated_P3": "Multiciliated lineage", + "Classical Monocyte_P1": "Monocytes", + "Classical Monocyte_P2": "Monocytes", + "Classical Monocyte_P3": "Monocytes", + "Club_P1": "Secretory", + "Club_P2": "Secretory", + "Club_P3": "Secretory", + "Differentiating Basal_P1": "Basal", + "Differentiating Basal_P3": "Basal", + "EREG+ Dendritic_P1": "Macrophages", + "EREG+ Dendritic_P2": "Macrophages", + "Fibromyocyte_P3": "Fibromyocyte", + "Goblet_P3": "Secretory", + "IGSF21+ Dendritic_P1": "Macrophages", + "IGSF21+ Dendritic_P2": "Macrophages", + "IGSF21+ Dendritic_P3": "Macrophages", + "Intermediate Monocyte_P2": "Monocytes", + "Ionocyte_P3": "Rare", + "Lipofibroblast_P1": "Fibroblasts", + "Lymphatic_P1": "Lymphatic EC", + "Lymphatic_P2": "Lymphatic EC", + "Lymphatic_P3": "Lymphatic EC", + "Macrophage_P1": "Macrophages", + "Macrophage_P2": "Macrophages", + "Macrophage_P3": "Macrophages", + "Mesothelial_P1": "Mesothelium", + "Mucous_P2": "Submucosal Secretory", + "Mucous_P3": "Submucosal Secretory", + "Myeloid Dendritic Type 1_P1": "Dendritic cells", + "Myeloid Dendritic Type 1_P2": "Dendritic cells", + "Myeloid Dendritic Type 1_P3": "Dendritic cells", + "Myeloid Dendritic Type 2_P1": "Dendritic cells", + "Myeloid Dendritic Type 2_P2": "Dendritic cells", + "Myeloid Dendritic Type 2_P3": "Dendritic cells", + "Myofibroblast_P1": "Myofibroblasts", + "Myofibroblast_P2": "Myofibroblasts", + "Myofibroblast_P3": "Myofibroblasts", + "Natural Killer T_P2": "T cell lineage", + "Natural Killer T_P3": "T cell lineage", + "Natural Killer_P1": "Innate lymphoid cells", + "Natural Killer_P2": "Innate lymphoid cells", + "Natural Killer_P3": "Innate lymphoid cells", + "Neuroendocrine_P3": "Rare", + "Nonclassical Monocyte_P1": "Monocytes", + "Nonclassical Monocyte_P2": "Monocytes", + "Nonclassical Monocyte_P3": "Monocytes", + "OLR1+ Classical Monocyte_P2": "Monocytes", + "Pericyte_P1": "Fibroblasts", + "Pericyte_P2": "Fibroblasts", + "Pericyte_P3": "Fibroblasts", + "Plasma_P1": "B cell lineage", + "Plasma_P3": "B cell lineage", + "Plasmacytoid Dendritic_P1": "Dendritic cells", + "Plasmacytoid Dendritic_P2": "Dendritic cells", + "Plasmacytoid Dendritic_P3": "Dendritic cells", + "Platelet/Megakaryocyte_P1": "Megakaryocytes", + "Platelet/Megakaryocyte_P3": "Megakaryocytes", + "Proliferating Basal_P1": "Basal", + "Proliferating Basal_P3": "Basal", + "Proliferating Macrophage_P1": "Macrophages", + "Proliferating Macrophage_P2": "Macrophages", + "Proliferating Macrophage_P3": "Macrophages", + "Proliferating NK/T_P2": "Innate lymphoid cells", + "Proliferating NK/T_P3": "Innate lymphoid cells", + "Proximal Basal_P3": "Basal", + "Proximal Ciliated_P3": "Multiciliated lineage", + "Serous_P3": "Submucosal Secretory", + "Signaling Alveolar Epithelial Type 2_P3": "AT2", + "TREM2+ Dendritic_P1": "Macrophages", + "TREM2+ Dendritic_P3": "Macrophages", + "Vascular Smooth Muscle_P2": "2_Smooth Muscle", + "Vascular Smooth Muscle_P3": "2_Smooth Muscle", + "Vein_P1": "Venous", + "Vein_P2": "Venous", + "Vein_P3": "Venous", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") + self.adata = anndata.read(fn) + self.adata.X = scipy.sparse.csc_matrix(self.adata.X) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nUMI"].values[:, None])) \ + .multiply(1 / 10000) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_smartseq2_travaglini_002.py new file mode 100644 index 000000000..1ebf48fb2 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_smartseq2_travaglini_002.py @@ -0,0 +1,162 @@ +import anndata +import os +from typing import Union +import scipy.sparse +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + """ + This data loader directly processes the data file provided under the download link. To obtain the file, you need to create a + free account at https://www.synapse.org. You can then use those login credentials to download the file with python + using the synapse client, installable via `pip install synapseclient`: + + import synapseclient + import shutil + syn = synapseclient.Synapse() + syn.login("synapse_username","password") + syn21625142 = syn.get(entity="syn21625142") + shutil.move(syn21625142.path, "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad") + + :param path: + :param meta_path: + :param kwargs: + """ + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_smartseq2_travaglini_002_10.1038/s41586-020-2922-4" + + self.download = "https://www.synapse.org/#!Synapse:syn21041850" + self.download_meta = None + + self.author = "Krasnow" + self.doi = "10.1038/s41586-020-2922-4" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" + self.organism = "human" + self.protocol = "smartseq2" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + + self.class_maps = { + "0": { + "Adventitial Fibroblast_P1": "Fibroblasts", + "Adventitial Fibroblast_P2": "Fibroblasts", + "Adventitial Fibroblast_P3": "Fibroblasts", + "Airway Smooth Muscle_P1": "Airway smooth muscle", + "Airway Smooth Muscle_P2": "Airway smooth muscle", + "Airway Smooth Muscle_P3": "Airway smooth muscle", + "Alveolar Epithelial Type 1_P1": "AT1", + "Alveolar Epithelial Type 1_P2": "AT1", + "Alveolar Epithelial Type 1_P3": "AT1", + "Alveolar Epithelial Type 2_P1": "AT2", + "Alveolar Epithelial Type 2_P2": "AT2", + "Alveolar Epithelial Type 2_P3": "AT2", + "Alveolar Fibroblast_P1": "Fibroblasts", + "Alveolar Fibroblast_P2": "Fibroblasts", + "Alveolar Fibroblast_P3": "Fibroblasts", + "Artery_P1": "Arterial", + "Artery_P2": "Arterial", + "Artery_P3": "Arterial", + "B_P1": "B cell lineage", + "B_P2": "B cell lineage", + "B_P3": "B cell lineage", + "Basal_P1": "Basal", + "Basal_P2": "Basal", + "Basal_P3": "Basal", + "Basophil/Mast 1_P1": "Mast cells", + "Basophil/Mast 1_P2": "Mast cells", + "Basophil/Mast 1_P3": "Mast cells", + "Bronchial Vessel 1_P1": "Bronchial Vessel 1", + "CD4+ Memory/Effector T_P1": "T cell lineage", + "CD4+ Naive T_P1": "T cell lineage", + "CD4+ Naive T_P2": "T cell lineage", + "CD8+ Memory/Effector T_P1": "T cell lineage", + "CD8+ Naive T_P1": "T cell lineage", + "CD8+ Naive T_P2": "T cell lineage", + "Capillary Aerocyte_P1": "Capillary", + "Capillary Aerocyte_P2": "Capillary", + "Capillary Aerocyte_P3": "Capillary", + "Capillary Intermediate 1_P2": "Capillary Intermediate 1", + "Capillary_P1": "Capillary", + "Capillary_P2": "Capillary", + "Capillary_P3": "Capillary", + "Ciliated_P1": "Multiciliated lineage", + "Ciliated_P2": "Multiciliated lineage", + "Ciliated_P3": "Multiciliated lineage", + "Classical Monocyte_P1": "Monocytes", + "Club_P1": "Secretory", + "Club_P2": "Secretory", + "Club_P3": "Secretory", + "Dendritic_P1": "Dendritic cells", + "Differentiating Basal_P3": "Basal", + "Fibromyocyte_P3": "Fibromyocyte", + "Goblet_P1": "Secretory", + "Goblet_P2": "Secretory", + "Goblet_P3": "Secretory", + "IGSF21+ Dendritic_P2": "Macrophages", + "IGSF21+ Dendritic_P3": "Macrophages", + "Intermediate Monocyte_P2": "Monocytes", + "Intermediate Monocyte_P3": "Monocytes", + "Ionocyte_P3": "Rare", + "Lipofibroblast_P1": "Fibroblasts", + "Lymphatic_P1": "Lymphatic EC", + "Lymphatic_P2": "Lymphatic EC", + "Lymphatic_P3": "Lymphatic EC", + "Macrophage_P2": "Macrophages", + "Macrophage_P3": "Macrophages", + "Myeloid Dendritic Type 2_P3": "Dendritic cells", + "Myofibroblast_P2": "Myofibroblasts", + "Myofibroblast_P3": "Myofibroblasts", + "Natural Killer T_P2": "T cell lineage", + "Natural Killer T_P3": "T cell lineage", + "Natural Killer_P1": "Innate lymphoid cells", + "Natural Killer_P2": "Innate lymphoid cells", + "Natural Killer_P3": "Innate lymphoid cells", + "Neuroendocrine_P1": "Rare", + "Neuroendocrine_P3": "Rare", + "Neutrophil_P1": "Monocytes", + "Neutrophil_P2": "Monocytes", + "Neutrophil_P3": "Monocytes", + "Nonclassical Monocyte_P1": "Monocytes", + "Nonclassical Monocyte_P2": "Monocytes", + "Pericyte_P1": "Fibroblasts", + "Pericyte_P2": "Fibroblasts", + "Pericyte_P3": "Fibroblasts", + "Plasma_P3": "B cell lineage", + "Plasmacytoid Dendritic_P1": "Dendritic cells", + "Plasmacytoid Dendritic_P2": "Dendritic cells", + "Plasmacytoid Dendritic_P3": "Dendritic cells", + "Proliferating NK/T_P2": "Innate lymphoid cells", + "Proliferating NK/T_P3": "Innate lymphoid cells", + "Signaling Alveolar Epithelial Type 2_P1": "AT2", + "Signaling Alveolar Epithelial Type 2_P3": "AT2", + "Vascular Smooth Muscle_P1": "2_Smooth Muscle", + "Vascular Smooth Muscle_P2": "2_Smooth Muscle", + "Vascular Smooth Muscle_P3": "2_Smooth Muscle", + "Vein_P2": "Venous", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad") + self.adata = anndata.read(fn) + self.adata.X = scipy.sparse.csc_matrix(self.adata.X) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nReads"].values[:, None])) \ + .multiply(1 / 1000000) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py new file mode 100644 index 000000000..681dc6abd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py @@ -0,0 +1,76 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2019_10x_james_001_10.1038/s41590-020-0602-z" + + self.download = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" + self.download_meta = None + + self.author = "Teichmann" + self.doi = "10.1038/s41590-020-0602-z" + self.healthy = True + self.normalization = "raw" + self.organ = "colon" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + self.var_ensembl_col = "gene_ids" + + self.obs_key_cellontology_original = "cell_type" + + self.class_maps = { + "0": { + "Activated CD4 T": "Activated CD4 T", + "B cell IgA Plasma": "B cell IgA Plasma", + "B cell IgG Plasma": "B cell IgG Plasma", + "B cell cycling": "B cell cycling", + "B cell memory": "B cell memory", + "CD8 T": "CD8 T", + "Follicular B cell": "Follicular", + "ILC": "ILC", + "LYVE1 Macrophage": "LYVE1 Macrophage", + "Lymphoid DC": "Lymphoid DC", + "Macrophage": "Macrophage", + "Mast": "Mast cell", + "Monocyte": "Monocyte", + "NK": "NK", + "Tcm": "Tcm", + "Tfh": "Tfh", + "Th1": "Th1", + "Th17": "Th17", + "Treg": "Treg", + "cDC1": "DC1", + "cDC2": "DC2", + "cycling DCs": "cycling DCs", + "cycling gd T": "cycling gd T", + "gd T": "gd T", + "pDC": "pDC", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "colon", "james20.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py new file mode 100644 index 000000000..a7df368b3 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py @@ -0,0 +1,67 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2019_10x_braga_001_10.1038/s41591-019-0468-5" + + self.download = "https://covid19.cog.sanger.ac.uk/vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad" + self.download_meta = None + + self.author = "Teichmann" + self.doi = "10.1038/s41591-019-0468-5" + self.healthy = True + self.organ = "lung" # ToDo: "alveoli, parenchyma" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + self.normalization = "norm" + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Ciliated 2": "Multiciliated lineage", + "Luminal_Macrophages": "Macrophages", + "Basal 1": "Basal", + "Dendritic cells": "Dendritic cells", + "Endothelial": "1_Endothelial", + "Lymphatic": "Lymphatic EC", + "Ciliated 1": "Multiciliated lineage", + "Smooth muscle": "2_Smooth Muscle", + "Type_1_alveolar": "AT1", + "Neutrophils": "Monocytes", + "Club": "Secretory", + "Basal 2": "Basal", + "B cells": "B cell lineage", + "T and NK": "2_Lymphoid", + "Mesothelium": "Mesothelium", + "Mast cells": "Mast cells", + "Fibroblasts": "2_Fibroblast lineage", + "Type 2 alveolar": "AT2", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_002.py new file mode 100644 index 000000000..54985ca9b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_002.py @@ -0,0 +1,67 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2019_10x_braga_002_10.1038/s41591-019-0468-5" + + self.download = "https://covid19.cog.sanger.ac.uk/vieira19_Bronchi_anonymised.processed.h5ad" + self.download_meta = None + + self.author = "Teichmann" + self.doi = "10.1038/s41591-019-0468-5" + self.healthy = True + self.normalization = "norm" + self.organ = "lung" # ToDo "bronchi" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Ciliated 1": "Multiciliated lineage", + "Club": "Secretory", + "Ciliated 2": "Multiciliated lineage", + "Ionocytes": "Rare", + "Basal 2": "Basal", + "Goblet_1": "Secretory", + "Goblet 2": "Secretory", + "Basal 1": "Basal", + "Dendritic cells": "Dendritic cells", + "B cells": "B cell lineage", + "Luminal_Macrophages": "Macrophages", + "Neutrophils": "Monocytes", + "Endothelial": "1_Endothelial", + "Smooth muscle": "2_Smooth Muscle", + "T and NK": "2_Lymphoid", + "Fibroblasts": "2_Fibroblast lineage", + "Lymphatic": "Lymphatic EC", + "Mast cells": "Mast cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "vieira19_Bronchi_anonymised.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_003.py new file mode 100644 index 000000000..3d6542451 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_003.py @@ -0,0 +1,65 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2019_dropseq_braga_003_10.1038/s41591-019-0468-5" + + self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fraw%5Fcounts%2Ecsv%2Egz" + self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fbarcodes%5Fcell%5Ftypes%2Etxt%2Egz" + + self.author = "Teichmann" + self.doi = "10.1038/s41591-019-0468-5" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" # ToDo: "parenchymal lung and distal airway specimens" + self.organism = "human" + self.protocol = "dropseq" + self.state_exact = "uninvolved areas of tumour resection material" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "celltype" + + self.class_maps = { + "0": { + "Fibroblast": "Fibroblasts", + "Type 2": "AT2", + "B cell": "B cell lineage", + "Macrophages": "Macrophages", + "NK cell": "Innate lymphoid cells", + "T cell": "T cell lineage", + "Ciliated": "Multiciliated lineage", + "Lymphatic": "Lymphatic EC", + "Type 1": "AT1", + "Transformed epithelium": "1_Epithelial", + "Secretory": "Secretory", + "Endothelium": "1_Endothelial", + "Mast cell": "Mast cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "lung", "GSE130148_raw_counts.csv.gz"), + os.path.join(self.path, "human", "lung", "GSE130148_barcodes_cell_types.txt.gz"), + ] + self.adata = anndata.read_csv(fn[0]).T + self.adata.obs = pd.read_csv(fn[1], sep="\t", index_col=0) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py new file mode 100644 index 000000000..4ad06f4ec --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -0,0 +1,81 @@ +import anndata +import numpy as np +import os +import pandas +from typing import Union +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_brain_2019_10x_hove_001_10.1038/s41593-019-0393-4" + + self.download = \ + "www.brainimmuneatlas.org/data_files/toDownload/filtered_gene_bc_matrices_mex_WT_fullAggr.zip" + self.download_meta = \ + "www.brainimmuneatlas.org/data_files/toDownload/annot_fullAggr.csv" + + self.author = "Movahedi" + self.doi = "10.1038/s41593-019-0393-4" + self.healthy = True + self.normalization = "raw" + self.organ = "brain" + self.organism = "mouse" + self.protocol = "microwell" + self.state_exact = "healthy" + self.year = 2019 + + self.var_ensembl_col = "ensembl" + self.var_symbol_col = "names" + + self.obs_key_cellontology_class = self._ADATA_IDS_SFAIRA.cell_ontology_class + self.obs_key_cellontology_id = self._ADATA_IDS_SFAIRA.cell_ontology_id + self.obs_key_cellontology_original = self._ADATA_IDS_SFAIRA.cell_ontology_class + + self.class_maps = { + "0": { + "Microglia": "microglial cell", + "T/NKT cells": "CD8-positive, alpha-beta T cell", + "Monocytes": "monocyte" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "matrix.mtx") + fn_barcodes = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "barcodes.tsv") + fn_var = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "genes.tsv") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "annot_fullAggr.csv") + + self.adata = anndata.read_mtx(fn) + self.adata = anndata.AnnData(self.adata.X.T) + var = pandas.read_csv(fn_var, sep="\t", header=None) + var.columns = ["ensembl", "name"] + obs_names = pandas.read_csv(fn_barcodes, sep="\t", header=None)[0].values + assert len(obs_names) == self.adata.shape[0] + assert var.shape[0] == self.adata.shape[1] + obs = pandas.read_csv(self.path + fn_meta) + + # Match annotation to raw data. + obs.index = obs["cell"].values + obs = obs.loc[[x in obs_names for x in obs.index], :] + idx_tokeep = np.where([x in obs.index for x in obs_names])[0] + self.adata = self.adata[idx_tokeep, :] + obs_names = obs_names[idx_tokeep] + idx_map = np.array([obs.index.tolist().index(x) for x in obs_names]) + self.adata = self.adata[idx_map, :] + obs_names = obs_names[idx_map] + + # Assign attributes + self.adata.obs_names = obs_names + self.adata.var = var + self.adata.obs = obs + assert np.all(self.adata.obs_names == self.adata.obs["cell"].values) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py new file mode 100644 index 000000000..81e9cf13c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py @@ -0,0 +1,65 @@ +import anndata +import os +from typing import Union +import pandas as pd +import scipy.io +import gzip +import tarfile + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_10x_liao_001_10.1038/s41597-019-0351-8" + + self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" + self.download_meta = None + + self.author = "Mo" + self.healthy = True + self.normalization = "raw" + self.organ = "kidney" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + self.doi = "10.1038/s41597-019-0351-8" + + self.var_symbol_col = "names" + self.var_ensembl_col = "ensembl" + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "kidney", "GSE131685_RAW.tar") + adatas = [] + with tarfile.open(fn) as tar: + for member in tar.getmembers(): + if "_matrix.mtx.gz" in member.name: + name = "_".join(member.name.split("_")[:-1]) + with gzip.open(tar.extractfile(member), "rb") as mm: + X = scipy.io.mmread(mm).T.tocsr() + obs = pd.read_csv(tar.extractfile(name + "_barcodes.tsv.gz"), compression="gzip", header=None, + sep="\t", index_col=0) + obs.index.name = None + var = pd.read_csv(tar.extractfile(name + "_features.tsv.gz"), compression="gzip", header=None, + sep="\t").iloc[:, :2] + var.columns = ["ensembl", "names"] + var.index = var["ensembl"].values + self.adata = anndata.AnnData(X=X, obs=obs, var=var) + self.adata.obs["sample"] = name + adatas.append(self.adata) + self.adata = adatas[0].concatenate(adatas[1:]) + del self.adata.obs["batch"] diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/__init__.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py new file mode 100644 index 000000000..929d0094b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py @@ -0,0 +1,58 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_eye_2019_10x_voigt_001_10.1073/pnas.1914143116" + + self.download = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" + self.download_meta = None + + self.author = "Mullins" + self.doi = "10.1073/pnas.1914143116" + self.healthy = True + self.normalization = "norm" + self.organ = "eye" # ToDo: "retina" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "B-cell": "B-cell", + "Endothelial": "Endothelial cell", + "Fibroblast": "Fibroblast", + "Macrophage": "Macrophage", + "Mast-cell": "Mast-cell", + "Melanocyte": "Melanocyte", + "Pericyte": "Pericyte", + "RPE": "Retinal pigment epithelium", + "Schwann1": "Schwann1", + "Schwann2": "Schwann2", + "T/NK-cell": "T/NK-cell", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "eye", "voigt19.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/__init__.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_colon_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_colon_2019_10x_wang_001.py new file mode 100644 index 000000000..e3fb7a4dd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_colon_2019_10x_wang_001.py @@ -0,0 +1,57 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2019_10x_wang_001_10.1084/jem.20191130" + + self.download = "https://covid19.cog.sanger.ac.uk/wang20_colon.processed.h5ad" + self.download_meta = None + + self.author = "Chen" + self.healthy = True + self.normalization = "raw" + self.organ = "colon" + self.organism = "human" + self.doi = "10.1084/jem.20191130" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Progenitor": "Enterocyte Progenitors", + "Enterocyte": "Enterocytes", + "Goblet": "Goblet cells", + "TA": "TA", + "Paneth-like": "Paneth cells", + "Stem Cell": "Stem cells", + "Enteriendocrine": "Enteroendocrine cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "colon", "wang20_colon.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_ileum_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_ileum_2019_10x_wang_001.py new file mode 100644 index 000000000..8ccaa58fd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_ileum_2019_10x_wang_001.py @@ -0,0 +1,57 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_ileum_2019_10x_wang_001_10.1084/jem.20191130" + + self.download = "https://covid19.cog.sanger.ac.uk/wang20_ileum.processed.h5ad" + self.download_meta = None + + self.author = "Chen" + self.doi = "10.1084/jem.20191130" + self.healthy = True + self.normalization = "raw" + self.organ = "ileum" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Progenitor": "Progenitors", + "Goblet": "Goblet cells", + "Enterocyte": "Enterocytes", + "Paneth-like": "Paneth cells", + "Stem Cell": "Stem Cell", + "TA": "TA", + "Enteriendocrine": "Enteroendocrine cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "ileum", "wang20_ileum.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_rectum_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_rectum_2019_10x_wang_001.py new file mode 100644 index 000000000..54126316a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_rectum_2019_10x_wang_001.py @@ -0,0 +1,56 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_rectum_2019_10x_wang_001_10.1084/jem.20191130" + + self.download = "https://covid19.cog.sanger.ac.uk/wang20_rectum.processed.h5ad" + + self.author = "Chen" + self.doi = "10.1084/jem.20191130" + self.healthy = True + self.normalization = "raw" + self.organ = "rectum" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Progenitor": "Enterocyte progenitor", + "Goblet": "Goblet", + "Enterocyte": "Enterocyte", + "Paneth-like": "Paneth-like", + "Stem Cell": "Stem Cell", + "TA": "TA", + "Enteriendocrine": "Enteroendocrine", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "rectum", "wang20_rectum.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/__init__.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py new file mode 100644 index 000000000..01e20cbd3 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py @@ -0,0 +1,61 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_10x_lukassen_001_10.1101/2020.03.13.991455" + + self.download = "https://covid19.cog.sanger.ac.uk/lukassen20_lung_orig.processed.h5ad" + self.download_meta = None + + self.author = "Eils" + self.doi = "10.1101/2020.03.13.991455" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Ciliated": "Multiciliated lineage", + "Endothelial": "1_Endothelial", + "AT2": "AT2", + "LymphaticEndothelium": "Lymphatic EC", + "Fibroblasts": "2_Fibroblast lineage", + "Club": "Secretory", + "Immuno_TCells": "T cell lineage", + "Immuno_Monocytes": "Monocytes", + "AT1": "AT1" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "lukassen20_lung_orig.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nCount_RNA"].values[:, None]))\ + .multiply(1 / 10000) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_002.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_002.py new file mode 100644 index 000000000..ea766e1cd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_002.py @@ -0,0 +1,66 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_10x_lukassen_002_10.1101/2020.03.13.991455" + + self.download = "https://covid19.cog.sanger.ac.uk/lukassen20_airway_orig.processed.h5ad" + self.download_meta = None + + self.author = "Eils" + self.doi = "10.1101/2020.03.13.991455" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" # ToDo: "bronchial epithelial cells" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Secretory3": "Secretory", + "Ciliated1": "Multiciliated lineage", + "Goblet": "Secretory", + "Ciliated2": "Multiciliated lineage", + "Club": "Secretory", + "Secretory2": "Secretory", + "FOXN4": "Rare", + "Basal1": "Basal", + "Secretory1": "Secretory", + "Fibroblast": "2_Fibroblast lineage", + "Ionocyte": "Rare", + "Basal3": "Basal", + "Basal_Mitotic": "Basal", + "Basal2": "Basal", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "lukassen20_airway_orig.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nCount_RNA"].values[:, None]))\ + .multiply(1 / 10000) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/__init__.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/base.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/base.py new file mode 100644 index 000000000..0a07210ec --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/base.py @@ -0,0 +1,56 @@ +import anndata +from typing import Union +from sfaira.data import DatasetBase + + +class Dataset_d10_1101_661728(DatasetBase): + """ + This is a dataloader template for tabula muris data. + """ + + def __init__( + self, + path: Union[str, None], + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.source = source + if self.source == "aws": + self.download = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" + elif self.source == "figshare": + self.download = "https://ndownloader.figshare.com/articles/8273102/versions/2" + else: + raise ValueError("source %s not recognized" % self.source) + + self.obs_key_cellontology_original = "free_annotation" + self.obs_key_age = "age" + self.obs_key_dev_stage = "development_stage" # not given in all data sets + self.obs_key_sex = "sex" + # ToDo: further anatomical information for subtissue in "subtissue" + + self.author = "Quake" + self.doi = "10.1101/661728" + self.healthy = True + self.normalization = "norm" + self.organism = "mouse" + self.state_exact = "healthy" + self.year = 2019 + + self.var_ensembl_col = None + self.var_symbol_col = "index" + + def _load_generalized(self, fn): + self.adata = anndata.read_h5ad(fn) + if self.source == "aws": + self.adata.X = self.adata.raw.X + self.adata.var = self.adata.raw.var + del self.adata.raw + self.adata.obsm = {} + self.adata.varm = {} + self.adata.uns = {} + + def _get_protocol_tms(self, x) -> str: + return "smartseq2" if "smartseq2" in x else "10x" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_10x_pisco_001.py new file mode 100644 index 000000000..ba71dfa03 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" + self.organ = "adipose" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "adipose", "Fat_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..710a502fd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_adipose_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "adipose" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-BAT.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "adipose", "bat_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_002.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_002.py new file mode 100644 index 000000000..93af8ff7e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_002.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_adipose_2019_smartseq2_pisco_002_10.1101/661728" + self.organ = "adipose" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-GAT.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "adipose", "gat_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_003.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_003.py new file mode 100644 index 000000000..755c34976 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_003.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_adipose_2019_smartseq2_pisco_003_10.1101/661728" + self.organ = "adipose" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-MAT.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "adipose", "mat_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_004.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_004.py new file mode 100644 index 000000000..864b0e0af --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_004.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" + self.organ = "adipose" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-SCAT.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "adipose", "scat_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_10x_pisco_001.py new file mode 100644 index 000000000..c6889776b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_bladder_2019_10x_pisco_001_10.1101/661728" + self.organ = "bladder" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "bladder", "Bladder_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..41da10137 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_bladder_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "bladder" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-facs-processed-official-annotations-Bladder.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "bladder", "Bladder_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_10x_pisco_001.py new file mode 100644 index 000000000..2a919d354 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_10x_pisco_001.py @@ -0,0 +1,36 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_bone_2019_10x_pisco_001_10.1101/661728" + self.organ = "bone" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-droplet-processed-official-annotations-Marrow.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "bone", "Marrow_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) + self.set_unkown_class_id(ids=["unknown cell-1", "unknown cell-2"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..bc06538cd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_smartseq2_pisco_001.py @@ -0,0 +1,36 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_bone_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "bone" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-facs-processed-official-annotations-Marrow.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "bone", "Marrow_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) + self.set_unkown_class_id(ids=["nan-marrow-needs-subclustering"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..68b1f2d51 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_001.py @@ -0,0 +1,37 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_brain_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "brain" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Non-Myeloid.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "brain", "Brain_Non-Myeloid_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) + + self.set_unkown_class_id(ids=["Il6 expressing cells"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_002.py new file mode 100644 index 000000000..d23cb6411 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_002.py @@ -0,0 +1,37 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_brain_2019_smartseq2_pisco_002_10.1101/661728" + self.organ = "brain" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Myeloid.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "brain", "Brain_Myeloid_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) + + self.set_unkown_class_id(ids=["Il6 expressing cells"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py new file mode 100644 index 000000000..239a8eb7e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py @@ -0,0 +1,34 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_colon_2019_10x_pisco_001_10.1101/661728" + self.organ = "colon" + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..1d3a206e4 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "colon" + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-facs-processed-official-annotations-Large_Intestine.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_diaphragm_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..d1c15fa22 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_diaphragm_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_diaphragm_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "diaphragm" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "diaphragm", "tabula-muris-senis-facs-processed-official-annotations-Diaphragm.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "diaphragm", "Diaphragm_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_10x_pisco_001.py new file mode 100644 index 000000000..995b2f06f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_heart_2019_10x_pisco_001_10.1101/661728" + self.organ = "heart" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "heart", "Heart_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..a59e8ed75 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "heart" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Heart.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "heart", "Heart_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_002.py new file mode 100644 index 000000000..a37a09f12 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_002.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "heart" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Aorta.h5ad") + elif self.source == "figshare": + raise ValueError("not defined") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py new file mode 100644 index 000000000..98d48650e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py @@ -0,0 +1,39 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_kidney_2019_10x_pisco_001_10.1101/661728" + self.organ = "kidney" + + self.class_maps = { + "0": { + "kidney capillary endothelial cell": "endothelial cell", + "kidney mesangial cell": "mesangial cell", + "kidney interstitial fibroblast": "interstitial fibroblast", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "kidney", "Kidney_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) + self.set_unkown_class_id(ids=["kidney cell"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..3016fedea --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py @@ -0,0 +1,39 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_kidney_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "kidney" + + self.class_maps = { + "0": { + "kidney capillary endothelial cell": "endothelial cell", + "kidney mesangial cell": "mesangial cell", + "kidney interstitial fibroblast": "interstitial fibroblast", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-facs-processed-official-annotations-Kidney.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "kidney", "Kidney_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) + self.set_unkown_class_id(ids=["kidney cell"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_10x_pisco_001.py new file mode 100644 index 000000000..56954c663 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_liver_2019_10x_pisco_001_10.1101/661728" + self.organ = "liver" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "liver", "Liver_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..a21bd1c7b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_liver_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "liver" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-facs-processed-official-annotations-Liver.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "liver", "Liver_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py new file mode 100644 index 000000000..05602ebf8 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py @@ -0,0 +1,36 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_lung_2019_10x_pisco_001_10.1101/661728" + self.organ = "lung" + + self.class_maps = { + "0": { + "ciliated columnar cell of tracheobronchial tree": "ciliated cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "lung", "Lung_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..615240b9b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py @@ -0,0 +1,36 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_lung_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "lung" + + self.class_maps = { + "0": { + "ciliated columnar cell of tracheobronchial tree": "ciliated cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-facs-processed-official-annotations-Lung.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "lung", "Lung_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_10x_pisco_001.py new file mode 100644 index 000000000..2a65a44be --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_mammarygland_2019_10x_pisco_001_10.1101/661728" + self.organ = "mammarygland" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-droplet-processed-official-annotations-Mammary_Gland.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..1a3fae56e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_mammarygland_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "mammarygland" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-facs-processed-official-annotations-Mammary_Gland.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_10x_pisco_001.py new file mode 100644 index 000000000..4fb82822e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_muscle_2019_10x_pisco_001_10.1101/661728" + self.organ = "muscle" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..6fb28fa22 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_muscle_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "muscle" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-facs-processed-official-annotations-Limb_Muscle.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py new file mode 100644 index 000000000..31e270b8b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py @@ -0,0 +1,36 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_pancreas_2019_10x_pisco_001_10.1101/661728" + self.organ = "pancreas" + + self.class_maps = { + "0": { + "pancreatic ductal cel": "pancreatic ductal cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-droplet-processed-official-annotations-Pancreas.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..aa9765a6c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py @@ -0,0 +1,36 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_pancreas_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "pancreas" + + self.class_maps = { + "0": { + "pancreatic ductal cel": "pancreatic ductal cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-facs-processed-official-annotations-Pancreas.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py new file mode 100644 index 000000000..5085b14b8 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py @@ -0,0 +1,33 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_skin_2019_10x_pisco_001_10.1101/661728" + self.organ = "skin" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-droplet-processed-official-annotations-Skin.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "skin", "Skin_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..a40b384e9 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py @@ -0,0 +1,34 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_skin_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "skin" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-facs-processed-official-annotations-Skin.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_10x_pisco_001.py new file mode 100644 index 000000000..bfd7a079f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_spleen_2019_10x_pisco_001_10.1101/661728" + self.organ = "spleen" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "spleen", "Spleen_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..038eb521e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py @@ -0,0 +1,33 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_spleen_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "spleen" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "spleen", "Spleen_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_10x_pisco_001.py new file mode 100644 index 000000000..80b21b384 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_thymus_2019_10x_pisco_001_10.1101/661728" + self.organ = "thymus" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-droplet-processed-official-annotations-Thymus.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "thymus", "Thymus_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..2de3dc695 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_thymus_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "thymus" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-facs-processed-official-annotations-Thymus.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "thymus", "Thymus_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_10x_pisco_001.py new file mode 100644 index 000000000..877168d52 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_tongue_2019_10x_pisco_001_10.1101/661728" + self.organ = "tongue" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-droplet-processed-official-annotations-Tongue.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "tongue", "Tongue_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..c76a73108 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_tongue_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "tongue" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-facs-processed-official-annotations-Tongue.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "tongue", "Tongue_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py new file mode 100644 index 000000000..3f5210e77 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py @@ -0,0 +1,36 @@ +import numpy as np +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_trachea_2019_10x_pisco_001_10.1101/661728" + self.organ = "trachea" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-droplet-processed-official-annotations-Trachea.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "trachea", "Trachea_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..9d3dd4fcd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py @@ -0,0 +1,36 @@ +import numpy as np +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_trachea_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "trachea" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-facs-processed-official-annotations-Trachea.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "trachea", "Trachea_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/__init__.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py new file mode 100644 index 000000000..51d8cc958 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py @@ -0,0 +1,94 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_10x_habermann_001_10.1101/753806" + + self.download = [ + "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fmatrix%2Emtx%2Egz", + "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fgenes%2Etsv%2Egz", + "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fbarcodes%2Etsv%2Egz" + ] + self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5FIPF%5Fmetadata%2Ecsv%2Egz" + + self.author = "Kropski" + self.doi = "10.1101/753806" + self.normalization = "raw" + self.organ = "lung" # ToDo: "parenchyma" + self.organism = "human" + self.protocol = "10x" + self.year = 2020 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "celltype" + self.obs_key_state_exact = "Diagnosis" + self.obs_key_healthy = "Status" + self.healthy_state_healthy = "Control" + + self.class_maps = { + "0": { + "Proliferating Macrophages": "Macrophages", + "Myofibroblasts": "Myofibroblasts", + "Proliferating Epithelial Cells": "Proliferating Epithelial Cells", + "Mesothelial Cells": "Mesothelium", + "cDCs": "Dendritic cells", + "Mast Cells": "Mast cells", + "Ciliated": "Multiciliated lineage", + "T Cells": "T cell lineage", + "pDCs": "Dendritic cells", + "Smooth Muscle Cells": "2_Smooth Muscle", + "Transitional AT2": "AT2", + "AT2": "AT2", + "B Cells": "B cell lineage", + "NK Cells": "Innate lymphoid cells", + "Monocytes": "Monocytes", + "Basal": "Basal", + "Plasma Cells": "B cell lineage", + "Differentiating Ciliated": "Multiciliated lineage", + "Macrophages": "Macrophages", + "MUC5B+": "Secretory", + "SCGB3A2+": "Secretory", + "Fibroblasts": "Fibroblasts", + "Lymphatic Endothelial Cells": "Lymphatic EC", + "Endothelial Cells": "2_Blood vessels", + "SCGB3A2+ SCGB1A1+": "Secretory", + "PLIN2+ Fibroblasts": "Fibroblasts", + "KRT5-/KRT17+": "KRT5-/KRT17+", + "MUC5AC+ High": "Secretory", + "Proliferating T Cells": "T cell lineage", + "AT1": "AT1", + "HAS1 High Fibroblasts": "Fibroblasts" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "lung", "GSE135893_matrix.mtx.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_genes.tsv.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_barcodes.tsv.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_IPF_metadata.csv.gz"), + ] + self.adata = anndata.read_mtx(fn[0]).T + self.adata.var = pd.read_csv(fn[1], index_col=0, header=None, names=["ids"]) + self.adata.obs = pd.read_csv(fn[2], index_col=0, header=None, names=["barcodes"]) + obs = pd.read_csv(fn[3], index_col=0) + self.adata = self.adata[obs.index.tolist(), :].copy() + self.adata.obs = obs + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/__init__.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py new file mode 100644 index 000000000..0e5dd65f6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py @@ -0,0 +1,128 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2019_10x_stewart_001_10.1126/science.aat5031" + + self.download = [ + "https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Mature_Full_v2.1.h5ad", + "https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Fetal_full.h5ad" + ] + self.download_meta = None + + self.author = "Clatworthy" + self.doi = "10.1126/science.aat5031" + self.healthy = True + self.normalization = "norm" + self.organ = "kidney" # ToDo: "renal medulla, renal pelvis, ureter, cortex of kidney" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.var_ensembl_col = "ID" + + self.obs_key_cellontology_original = "celltype" + + self.class_maps = { + "0": { + "Ascending vasa recta endothelium": "Endothelial Cells - AVR", + "B cell": "B cell", + "CD4 T cell": "CD4 T cell", + "CD8 T cell": "CD8 T cell", + "CNT/PC - proximal UB": "CNT/PC - proximal UB", + "Cap mesenchyme": "Cap mesenchyme", + "Connecting tubule": "Connecting tubule", + "Descending vasa recta endothelium": "Endothelial Cells - AEA & DVR", + "Distal S shaped body": "Distal S shaped body", + "Distal renal vesicle": "Distal renal vesicle", + "Distinct proximal tubule 1": "Distinct proximal tubule 1", + "Distinct proximal tubule 2": "Distinct proximal tubule 2", + "Endothelium": "Endothelial Cells (unassigned)", + "Epithelial progenitor cell": "Epithelial progenitor", + "Erythroid": "Erythroid", + "Fibroblast": "Fibroblast", + "Fibroblast 1": "Fibroblast", + "Fibroblast 2": "Fibroblast", + "Glomerular endothelium": "Endothelial Cells - glomerular capillaries", + "Indistinct intercalated cell": "Indistinct intercalated cell", + "Innate like lymphocyte": "Innate like lymphocyte", + "Loop of Henle": "Loop of Henle", + "MNP-a/classical monocyte derived": "MNP-a/classical monocyte derived", + "MNP-b/non-classical monocyte derived": "MNP-b/non-classical monocyte derived", + "MNP-c/dendritic cell": "MNP-c/dendritic cell", + "MNP-d/Tissue macrophage": "MNP-d/Tissue macrophage", + "Macrophage 1": "Macrophage", + "Macrophage 2": "Macrophage", + "Mast cell": "Mast cell", + "Mast cells": "Mast cell", + "Medial S shaped body": "Medial S shaped body", + "Megakaryocyte": "Megakaryocyte", + "Monocyte": "Monocyte", + "Myofibroblast": "Myofibroblast", + "Myofibroblast 1": "Myofibroblast", + "Myofibroblast 2": "Myofibroblast", + "NK cell": "NK cell", + "NKT cell": "NKT cell", + "Neuron": "Neuron", + "Neutrophil": "Neutrophil", + "Pelvic epithelium": "Pelvic epithelium", + "Pelvic epithelium - distal UB": "Pelvic epithelium - distal UB", + "Peritubular capillary endothelium 1": "Peritubular capillary endothelium 1", + "Peritubular capillary endothelium 2": "Peritubular capillary endothelium 2", + "Plasmacytoid dendritic cell": "Plasmacytoid dendritic cell", + "Podocyte": "Podocyte", + "Principal cell": "Principal cell", + "Proliferating B cell": "Proliferating B cell", + "Proliferating NK cell": "Proliferating NK cell", + "Proliferating Proximal Tubule": "Proliferating Proximal Tubule", + "Proliferating cDC2": "Proliferating cDC2", + "Proliferating cap mesenchyme": "Proliferating cap mesenchyme", + "Proliferating distal renal vesicle": "Proliferating distal renal vesicle", + "Proliferating fibroblast": "Proliferating fibroblast", + "Proliferating macrophage": "Proliferating macrophage", + "Proliferating monocyte": "Proliferating monocyte", + "Proliferating myofibroblast": "Proliferating myofibroblast", + "Proliferating stroma progenitor": "Proliferating stroma progenitor", + "Proximal S shaped body": "Proximal S shaped body", + "Proximal UB": "Proximal UB", + "Proximal renal vesicle": "Proximal renal vesicle", + "Proximal tubule": "Proximal tubule", + "Stroma progenitor": "Stroma progenitor", + "Thick ascending limb of Loop of Henle": "Thick ascending limb of Loop of Henle", + "Transitional urothelium": "Transitional urothelium", + "Type A intercalated cell": "Type A intercalated cell", + "Type B intercalated cell": "Collecting Duct - Intercalated Cells Type B", + "cDC1": "cDC1", + "cDC2": "cDC2", + "pDC": "pDC", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "kidney", "Mature_Full_v2.1.h5ad"), + os.path.join(self.path, "human", "kidney", "Fetal_full.h5ad") + ] + adult = anndata.read(fn[0]) + fetal = anndata.read(fn[1]) + adult.obs["development"] = "adult" + fetal.obs["development"] = "fetal" + self.adata = adult.concatenate(fetal) + self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/__init__.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py new file mode 100644 index 000000000..5f9715507 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py @@ -0,0 +1,91 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_thymus_2020_10x_park_001_10.1126/science.aay3224" + + self.download = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" + self.download_meta = None + + self.author = "Teichmann" + self.doi = "10.1126/science.aay3224" + self.healthy = True + self.normalization = "norm" + self.organ = "thymus" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "Anno_level_fig1" + + self.class_maps = { + "0": { + "B_memory": "B_memory", + "B_naive": "B_naive", + "B_plasma": "B_plasma", + "B_pro/pre": "B_pro/pre", + "CD4+T": "CD4+T", + "CD4+Tmem": "CD4+Tmem", + "CD8+T": "CD8+T", + "CD8+Tmem": "CD8+Tmem", + "CD8αα": "CD8αα", + "DC1": "DC1", + "DC2": "DC2", + "DN": "DN", + "DP": "DP", + "ETP": "ETP", + "Endo": "Endo", + "Epi_GCM2": "Epi_GCM2", + "Ery": "Ery", + "Fb_1": "Fb_1", + "Fb_2": "Fb_2", + "Fb_cycling": "Fb_cycling", + "ILC3": "ILC3", + "Lymph": "Lymph", + "Mac": "Mac", + "Mast": "Mast", + "Mgk": "Mgk", + "Mono": "Mono", + "NK": "NK", + "NKT": "NKT", + "NMP": "NMP", + "T(agonist)": "T(agonist)", + "TEC(myo)": "TEC(myo)", + "TEC(neuro)": "TEC(neuro)", + "Treg": "Treg", + "VSMC": "VSMC", + "aDC": "aDC", + "cTEC": "cTEC", + "mTEC(I)": "mTEC(I)", + "mTEC(II)": "mTEC(II)", + "mTEC(III)": "mTEC(III)", + "mTEC(IV)": "mTEC(IV)", + "mcTEC": "mcTEC", + "pDC": "pDC", + "αβT(entry)": "alpha_beta_T(entry)", + "γδT": "gamma_delta_T", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "thymus", "park20.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/__init__.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_esophagus_2019_10x_madissoon_001.py new file mode 100644 index 000000000..825c386f4 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_esophagus_2019_10x_madissoon_001.py @@ -0,0 +1,68 @@ +import anndata +import os +from typing import Union +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_esophagus_2019_10x_madissoon_001_10.1186/s13059-019-1906-x" + + self.download = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" + # Associated HCA project: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 + self.download_meta = None + + self.author = "Meyer" + self.doi = "10.1186/s13059-019-1906-x" + self.healthy = True + self.normalization = "raw" + self.organ = "esophagus" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.var_ensembl_col = "gene_ids-HCATisStab7413619" + + self.obs_key_cellontology_original = "Celltypes" + + self.class_maps = { + "0": { + "B_CD27neg": "B_CD27neg", + "B_CD27pos": "B_CD27pos", + "Blood_vessel": "Blood_vessel", + "Dendritic_Cells": "Dendritic cell", + "Epi_basal": "Basal cell", + "Epi_dividing": "Epi_dividing", + "Epi_stratified": "Stratified epithelial cell", + "Epi_suprabasal": "Epi_suprabasal", + "Epi_upper": "Epi_upper", + "Glands_duct": "Glands_duct", + "Glands_mucous": "Glands_mucous", + "Lymph_vessel": "Lymph_vessel", + "Mast_cell": "Mast cell", + "Mono_macro": "Mono_macro", + "NK_T_CD8_Cytotoxic": "NK_T_CD8_Cytotoxic", + "Stroma": "Stromal cell", + "T_CD4": "T_CD4", + "T_CD8": "T_CD8", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "esophagus", "oesophagus.cellxgene.h5ad") + self.adata = anndata.read(fn) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_lung_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_lung_2019_10x_madissoon_001.py new file mode 100644 index 000000000..aa1aded6c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_lung_2019_10x_madissoon_001.py @@ -0,0 +1,73 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2019_10x_madissoon_001._10.1186/s13059-019-1906-x" + + self.download = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" + self.download_meta = None + + self.author = "Meyer" + self.doi = "10.1186/s13059-019-1906-x" + self.healthy = True + self.normalization = "raw" + self.state_exact = "healthy" + self.organ = "lung" # ToDo: "parenchyma" + self.organism = "human" + self.protocol = "10x" + self.year = 2020 + + self.var_symbol_col = "index" + self.var_ensembl_col = "gene.ids.HCATisStab7509734" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "T_CD4": "T cell lineage", + "Mast_cells": "Mast cells", + "Monocyte": "Monocytes", + "Blood_vessel": "2_Blood vessels", + "Ciliated": "Multiciliated lineage", + "Macrophage_MARCOneg": "Macrophages", + "DC_plasmacytoid": "Dendritic cells", + "DC_1": "Dendritic cells", + "Muscle_cells": "2_Smooth Muscle", + "Macrophage_MARCOpos": "Macrophages", + "T_cells_Dividing": "T cell lineage", + "DC_Monocyte_Dividing": "Dendritic cells", + "B_cells": "B cell lineage", + "T_CD8_CytT": "T cell lineage", + "NK_Dividing": "Innate lymphoid cells", + "T_regulatory": "T cell lineage", + "DC_2": "Dendritic cells", + "Alveolar_Type2": "AT2", + "Plasma_cells": "B cell lineage", + "NK": "Innate lymphoid cells", + "Alveolar_Type1": "AT1", + "Fibroblast": "2_Fibroblast lineage", + "DC_activated": "Dendritic cells", + "Macrophage_Dividing": "Macrophages", + "Lymph_vessel": "Lymphatic EC", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "madissoon19_lung.processed.h5ad") + self.adata = anndata.read(fn) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_spleen_2019_10x_madissoon_001.py new file mode 100644 index 000000000..77325a3ca --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_spleen_2019_10x_madissoon_001.py @@ -0,0 +1,77 @@ +import anndata +import os +from typing import Union +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_spleen_2019_10x_madissoon_001_10.1186/s13059-019-1906-x" + + self.download = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" + self.download_meta = None + + self.author = "Meyer" + self.doi = "10.1186/s13059-019-1906-x" + self.healthy = True + self.normalization = "raw" + self.organ = "spleen" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.var_ensembl_col = "gene_ids-HCATisStab7463846" + + self.obs_key_cellontology_original = "Celltypes" + + self.class_maps = { + "0": { + "B_Hypermutation": "B_Hypermutation", + "B_T_doublet": "B_T_doublet", + "B_follicular": "B_follicular", + "B_mantle": "B_mantle", + "CD34_progenitor": "CD34_progenitor", + "DC_1": "DC_1", + "DC_2": "DC_2", + "DC_activated": "DC_activated", + "DC_plasmacytoid": "DC_plasmacytoid", + "ILC": "ILC", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "NK_CD160pos": "NK_CD160pos", + "NK_FCGR3Apos": "NK_FCGR3Apos", + "NK_dividing": "NK_dividing", + "Plasma_IgG": "Plasma_IgG", + "Plasma_IgM": "Plasma_IgM", + "Plasmablast": "Plasmablast", + "Platelet": "Platelet", + "T_CD4_conv": "T_CD4_conv", + "T_CD4_fh": "T_CD4_fh", + "T_CD4_naive": "T_CD4_naive", + "T_CD4_reg": "T_CD4_reg", + "T_CD8_CTL": "T_CD8_CTL", + "T_CD8_MAIT": "T_CD8_MAIT", + "T_CD8_activated": "T_CD8_activated", + "T_CD8_gd": "T_CD8_gd", + "T_cell_dividing": "Proliferating T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "spleen", "spleen.cellxgene.h5ad") + self.adata = anndata.read(fn) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/__init__.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py new file mode 100644 index 000000000..9527f968a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py @@ -0,0 +1,65 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_eye_2019_10x_lukowski_001_10.15252/embj.2018100811" + + self.download = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" + self.download_meta = None + + self.author = "Wong" + self.doi = "10.15252/embj.2018100811" + self.healthy = True + self.normalization = "raw" + self.organ = "eye" # ToDo: "retina" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.var_ensembl_col = "gene_ids" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Muller cell": "Muller cell", + "amacrine cell": "Amacrine cell", + "microglial cell": "Microglia", + "retinal bipolar neuron type A": "Retinal bipolar neuron type A", + "retinal bipolar neuron type B": "Retinal bipolar neuron type B", + "retinal bipolar neuron type C": "Retinal bipolar neuron type C", + "retinal bipolar neuron type D": "Retinal bipolar neuron type D", + "retinal cone cell": "Retinal cone cell", + "retinal ganglion cell": "Retinal ganglion cell", + "retinal rod cell type A": "Retinal rod cell type A", + "retinal rod cell type B": "Retinal rod cell type B", + "retinal rod cell type C": "Retinal rod cell type C", + "unannotated": "Unknown", + "unspecified": "Unknown", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "eye", "lukowski19.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d_nan/__init__.py b/sfaira/data/dataloaders/loaders/d_nan/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d_nan/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py new file mode 100644 index 000000000..d72ed32d6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py @@ -0,0 +1,48 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2018_10x_ica_001_unknown" + + self.download = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" + self.download_meta = None + + self.author = "Regev" + self.doi = "d_nan" + self.healthy = True + self.normalization = "raw" + self.organ = "blood" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "index" + self.var_ensembl_col = "Accession" + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "blood", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") + self.adata = anndata.read_loom(fn) + idx = np.logical_and((self.adata.obs["derived_organ_parts_label"] == "umbilical cord blood").values, + (self.adata.obs["emptydrops_is_cell"] == "t").values) + self.adata = self.adata[idx].copy() diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py new file mode 100644 index 000000000..808914f61 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py @@ -0,0 +1,55 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + """ + This data loader requires manual preprocessing of the raw datafile. To download the data, use the link in the + `.download_website` attribute of this class. To create the file required by this dataloader, run the following + python code: + + import scanpy + scanpy.read_10x_h5("pbmc_10k_v3_filtered_feature_bc_matrix.h5").write("pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") + + :param path: + :param meta_path: + :param kwargs: + """ + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2019_10x_10xGenomics_001_unknown" + + self.download = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" + self.download_meta = None + + self.author = "10x Genomics" + self.doi = "d_nan" + self.healthy = True + self.normalization = "raw" + self.organ = "blood" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.var_ensembl_col = "gene_ids" + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "blood", "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") + self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py new file mode 100644 index 000000000..807392226 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py @@ -0,0 +1,48 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_bone_2018_10x_ica_unknown" + + self.download = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" + self.download_meta = None + + self.author = "Regev" + self.doi = "d_nan" + self.healthy = True + self.normalization = "raw" + self.organ = "bone" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "index" + self.var_ensembl_col = "Accession" + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "bone", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") + self.adata = anndata.read_loom(fn) + idx = np.logical_and((self.adata.obs["derived_organ_parts_label"] == "bone marrow").values, + (self.adata.obs["emptydrops_is_cell"] == "t").values) + self.adata = self.adata[idx].copy() diff --git a/sfaira/data/dataloaders/loaders/super_group.py b/sfaira/data/dataloaders/loaders/super_group.py new file mode 100644 index 000000000..16f8d1733 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/super_group.py @@ -0,0 +1,45 @@ +import pydoc +import os +from typing import Union +from warnings import warn +from sfaira.data import DatasetSuperGroup, DatasetGroupDirectoryOriented + + +class DatasetSuperGroupLoaders(DatasetSuperGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + ): + """ + Class that sits on top of a directory of data set directories that each contain a data set group. + + :param file_base: + :param dir_prefix: Prefix to sub-select directories by. Set to "" for no constraints. + :param path: + :param meta_path: + :param cache_path: + """ + # Directory choice hyperparamters: + dir_prefix = "d" + dir_exlcude = [] + # Collect all data loaders from files in directory: + dataset_groups = [] + cwd = os.path.dirname(__file__) + for f in os.listdir(cwd): + if os.path.isdir(os.path.join(cwd, f)): # only directories + if f[:len(dir_prefix)] == dir_prefix and f not in dir_exlcude: # Narrow down to data set directories + path_dsg = pydoc.locate( + "sfaira.sfaira.data.dataloaders.loaders." + f + ".FILE_PATH") + if path_dsg is not None: + dataset_groups.append(DatasetGroupDirectoryOriented( + file_base=path_dsg, + path=path, + meta_path=meta_path, + cache_path=cache_path + )) + else: + warn(f"DatasetGroupDirectoryOriented was None for {f}") + super().__init__(dataset_groups=dataset_groups) diff --git a/sfaira/data/dataloaders/super_group.py b/sfaira/data/dataloaders/super_group.py new file mode 100644 index 000000000..2a549254b --- /dev/null +++ b/sfaira/data/dataloaders/super_group.py @@ -0,0 +1,35 @@ +from typing import Union + +from sfaira.data.dataloaders.loaders import DatasetSuperGroupLoaders +from sfaira.data.dataloaders.databases import DatasetSuperGroupDatabases +from sfaira.data import DatasetSuperGroup + + +class DatasetSuperGroupSfaira(DatasetSuperGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + ): + """ + Nested super group of data loaders, unifying data set wise data loader SuperGroup and the database + interface SuperGroup. + + :param path: + :param meta_path: + :param cache_path: + """ + super().__init__(dataset_groups=[ + DatasetSuperGroupLoaders( + path=path, + meta_path=meta_path, + cache_path=cache_path, + ), + DatasetSuperGroupDatabases( + path=path, + meta_path=meta_path, + cache_path=cache_path, + ) + ]) diff --git a/sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb b/sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb deleted file mode 100644 index 156861dd0..000000000 --- a/sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb +++ /dev/null @@ -1,411 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Human Cell Landscape Preprocessing\n", - "This jupyter notebook contains the code that is required to prepare the full Human Cell Landscape single cell dataset (Han et al., 2020. doi: 10.1038/s41586-020-2157-4) for use through sfaira dataloaders. The code downloads, annotates and cleans the provided adata pbject, saves it by sample and copies it into the right folders in your local sfaira dataset repository, so you can use it with sfaira dataloaders. The notebook saves all files in its working directory and requires you to provide the path to your local sfaira dataset repository in the last cell of this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/python/lib/python3.7/site-packages/anndata/_core/anndata.py:21: FutureWarning: pandas.core.index is deprecated and will be removed in a future version. The public classes are available in the top-level namespace.\n", - " from pandas.core.index import RangeIndex\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "import urllib.request\n", - "import numpy as np\n", - "import anndata as ad\n", - "import scipy.sparse\n", - "import os\n", - "import zipfile\n", - "from sfaira.versions.genome_versions.class_interface import SuperGenomeContainer" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('HCL_Fig1_adata.h5ad', )\n", - "('HCL_Fig1_cell_Info.xlsx', )\n", - "('annotation_rmbatch_data_revised417.zip', )\n" - ] - } - ], - "source": [ - "# download required files from human cell landscape publication data: https://figshare.com/articles/HCL_DGE_Data/7235471\n", - "print(urllib.request.urlretrieve('https://ndownloader.figshare.com/files/17727365', 'HCL_Fig1_adata.h5ad'))\n", - "print(urllib.request.urlretrieve('https://ndownloader.figshare.com/files/21758835', 'HCL_Fig1_cell_Info.xlsx'))\n", - "print(urllib.request.urlretrieve('https://ndownloader.figshare.com/files/22447898', 'annotation_rmbatch_data_revised417.zip'))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# extract the downloaded zip archive\n", - "with zipfile.ZipFile('annotation_rmbatch_data_revised417.zip', 'r') as zip_ref:\n", - " zip_ref.extractall('./')" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# load data file\n", - "adata = ad.read('HCL_Fig1_adata.h5ad')" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "adrenalgland 43476\n", - "stomach 41963\n", - "kidney 40691\n", - "blood 35533\n", - "lung 33698\n", - "brain 30493\n", - "liver 28501\n", - "pancreas 28473\n", - "colon 22301\n", - "pleura 19695\n", - "spleen 15806\n", - "malegonad 13211\n", - "omentum 12812\n", - "thyroid 12647\n", - "esophagus 11364\n", - "heart 10783\n", - "trachea 9949\n", - "chorionicvillus 9898\n", - "gallbladder 9769\n", - "artery 9652\n", - "placenta 9595\n", - "bladder 9048\n", - "bone 8704\n", - "cervix 8096\n", - "muscle 7775\n", - "uterus 7694\n", - "skin 6991\n", - "femalegonad 6941\n", - "fallopiantube 6556\n", - "rib 5992\n", - "spinalcord 5916\n", - "rectum 5718\n", - "jejunum 5549\n", - "calvaria 5129\n", - "duodenum 4681\n", - "thymus 4516\n", - "epityphlon 4486\n", - "ileum 3367\n", - "prostate 2445\n", - "ureter 2390\n", - "eye 1880\n", - "hesc 1660\n", - "adipose 1372\n", - "Name: organ, dtype: int64" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# convert to sparse matrix\n", - "adata.X = scipy.sparse.csr_matrix(adata.X).copy()\n", - "\n", - "# harmonise annotations\n", - "for col in ['batch', 'tissue']:\n", - " adata.obs[col] = adata.obs[col].astype('str')\n", - "adata.obs.index = adata.obs.index.str.replace('AdultJeJunum', 'AdultJejunum', regex=True).str.replace('AdultGallBladder', 'AdultGallbladder', regex=True).str.replace('FetalFemaleGonald', 'FetalFemaleGonad', regex=True)\n", - "adata.obs.replace({'AdultJeJunum': 'AdultJejunum', 'AdultGallBladder': 'AdultGallbladder', 'FetalFemaleGonald': 'FetalFemaleGonad'}, regex=True, inplace=True)\n", - "adata.obs.index = [\"-\".join(i.split('-')[:-1]) for i in adata.obs.index]\n", - "\n", - "# load celltype labels and harmonise them\n", - "fig1_anno = pd.read_excel('HCL_Fig1_cell_Info.xlsx', index_col='cellnames')\n", - "fig1_anno.index = fig1_anno.index.str.replace('AdultJeJunum', 'AdultJejunum', regex=True).str.replace('AdultGallBladder', 'AdultGallbladder', regex=True).str.replace('FetalFemaleGonald', 'FetalFemaleGonad', regex=True)\n", - "\n", - "# check that the order of cells and cell labels is the same\n", - "assert np.all(fig1_anno.index == adata.obs.index)\n", - "\n", - "# add annotations to adata object and rename columns\n", - "adata.obs = pd.concat([adata.obs, fig1_anno[['cluster', 'stage', 'donor', 'celltype']]], axis=1)\n", - "adata.obs.columns = ['sample', 'tissue', 'n_genes', 'n_counts', 'cluster_global', 'stage', 'donor', 'celltype_global']\n", - "\n", - "# add sample-wise annotations to the full adata object\n", - "df = pd.DataFrame(columns=['Cell_barcode', 'Sample', 'Batch', 'Cell_id', 'Cluster_id', 'Ages', 'Development_stage', 'Method', 'Gender', 'Source', 'Biomaterial', 'Name', 'ident', 'Celltype'])\n", - "for f in os.listdir('annotation_rmbatch_data_revised417/'):\n", - " df1 = pd.read_csv('annotation_rmbatch_data_revised417/'+f, encoding='unicode_escape')\n", - " df = pd.concat([df, df1], sort=True)\n", - "df = df.set_index('Cell_id')\n", - "adata = adata[[i in df.index for i in adata.obs.index]].copy()\n", - "a_idx = adata.obs.index.copy()\n", - "adata.obs = pd.concat([adata.obs, df[['Ages', 'Celltype', 'Cluster_id', 'Gender', 'Method', 'Source']]], axis=1)\n", - "assert np.all(a_idx == adata.obs.index)\n", - "\n", - "# remove mouse cells from the object\n", - "adata = adata[adata.obs['Source'] != 'MCA2.0'].copy()\n", - "\n", - "# tidy up the column names of the obs annotations\n", - "adata.obs.columns = ['sample', 'sub_tissue', 'n_genes', 'n_counts', 'cluster_global', 'dev_stage',\n", - " 'donor', 'celltype_global', 'age', 'celltype_specific', 'cluster_specific', 'gender',\n", - " 'protocol', 'source']\n", - "\n", - "# create some annotations that are used in sfaira\n", - "adata.obs[\"healthy\"] = True\n", - "adata.obs[\"state_exact\"] = 'healthy'\n", - "adata.obs[\"cell_ontology_class\"] = adata.obs[\"celltype_global\"]\n", - "adata.obs[\"cell_ontology_id\"] = None\n", - "\n", - "# convert gene ids to ensembl ids and store both\n", - "gc = SuperGenomeContainer(species='human', genome='Homo_sapiens_GRCh38_97')\n", - "id_dict = gc.names_to_id_dict\n", - "adata.var = adata.var.reset_index().rename({'index': 'names'}, axis='columns')\n", - "adata.var['ensembl'] = [id_dict[n] if n in id_dict.keys() else 'n/a' for n in adata.var['names']]\n", - "adata.var.index = adata.var['ensembl'].values\n", - "\n", - "# create a tidy organ annotaion which is then used in sfaira\n", - "adata.obs['organ'] = adata.obs['sub_tissue'] \\\n", - " .str.replace(\"Adult\", \"\") \\\n", - " .str.replace(\"Fetal\", \"\") \\\n", - " .str.replace(\"Neonatal\", \"\") \\\n", - " .str.replace(\"Transverse\", \"\") \\\n", - " .str.replace(\"Sigmoid\", \"\") \\\n", - " .str.replace(\"Ascending\", \"\") \\\n", - " .str.replace(\"Cord\", \"\") \\\n", - " .str.replace(\"Peripheral\", \"\") \\\n", - " .str.replace(\"CD34P\", \"\") \\\n", - " .str.replace(\"Cerebellum\", \"Brain\") \\\n", - " .str.replace(\"TemporalLobe\", \"Brain\") \\\n", - " .str.replace(\"BoneMarrow\", \"Bone\") \\\n", - " .str.replace(\"Spinal\", \"SpinalCord\") \\\n", - " .str.replace(\"Intestine\", \"Stomach\") \\\n", - " .str.replace(\"Eyes\", \"Eye\") \\\n", - " .str.lower()\n", - "\n", - "# print the number of cells per organ\n", - "adata.obs['organ'].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "... storing 'sample' as categorical\n", - "... storing 'sub_tissue' as categorical\n", - "... storing 'dev_stage' as categorical\n", - "... storing 'donor' as categorical\n", - "... storing 'celltype_global' as categorical\n", - "... storing 'age' as categorical\n", - "... storing 'celltype_specific' as categorical\n", - "... storing 'cluster_specific' as categorical\n", - "... storing 'gender' as categorical\n", - "... storing 'protocol' as categorical\n", - "... storing 'source' as categorical\n", - "... storing 'state_exact' as categorical\n", - "... storing 'cell_ontology_class' as categorical\n", - "... storing 'cell_ontology_id' as categorical\n", - "... storing 'organ' as categorical\n", - "... storing 'ensembl' as categorical\n" - ] - } - ], - "source": [ - "# write full adata object to disk\n", - "adata.write('HCL_processed.h5ad')" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n" - ] - } - ], - "source": [ - "# write separate files per sample as used in sfaira\n", - "os.mkdir('hcl_organs/')\n", - "for i in adata.obs['sample'].unique():\n", - " a = adata[adata.obs['sample'] == i].copy()\n", - " a.write('hcl_organs/hcl_{}.h5ad'.format(i))" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# copy the seperate h5ad files into your sfaira data repository\n", - "your_datarepository = '/path/to/repository' # path to the folder that contains the 'human' and 'mouse' directories\n", - "for samplefile in os.listdir('hcl_organs/'):\n", - " if samplefile.startswith('hcl_'):\n", - " a = ad.read('hcl_organs/'+samplefile)\n", - " organ = a.obs['organ'][0]\n", - " if organ not in os.listdir(f\"{your_datarepository}/human\"):\n", - " os.mkdir(f\"{your_datarepository}/human/{organ}\")\n", - " a.write(f'{your_datarepository}/human/{organ}/{samplefile}')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/sfaira/data/human/__init__.py b/sfaira/data/human/__init__.py deleted file mode 100644 index d986a9781..000000000 --- a/sfaira/data/human/__init__.py +++ /dev/null @@ -1,44 +0,0 @@ -from .adipose import DatasetGroupAdipose -from .adrenalgland import DatasetGroupAdrenalgland -from .mixed import DatasetGroupMixed -from .artery import DatasetGroupArtery -from .bladder import DatasetGroupBladder -from .blood import DatasetGroupBlood -from .bone import DatasetGroupBone -from .brain import DatasetGroupBrain -from .calvaria import DatasetGroupCalvaria -from .cervix import DatasetGroupCervix -from .chorionicvillus import DatasetGroupChorionicvillus -from .colon import DatasetGroupColon -from .duodenum import DatasetGroupDuodenum -from .epityphlon import DatasetGroupEpityphlon -from .esophagus import DatasetGroupEsophagus -from .eye import DatasetGroupEye -from .fallopiantube import DatasetGroupFallopiantube -from .femalegonad import DatasetGroupFemalegonad -from .gallbladder import DatasetGroupGallbladder -from .heart import DatasetGroupHeart -from .hesc import DatasetGroupHesc -from .ileum import DatasetGroupIleum -from .jejunum import DatasetGroupJejunum -from .kidney import DatasetGroupKidney -from .liver import DatasetGroupLiver -from .lung import DatasetGroupLung -from .malegonad import DatasetGroupMalegonad -from .muscle import DatasetGroupMuscle -from .omentum import DatasetGroupOmentum -from .pancreas import DatasetGroupPancreas -from .placenta import DatasetGroupPlacenta -from .pleura import DatasetGroupPleura -from .prostate import DatasetGroupProstate -from .rectum import DatasetGroupRectum -from .rib import DatasetGroupRib -from .skin import DatasetGroupSkin -from .spinalcord import DatasetGroupSpinalcord -from .spleen import DatasetGroupSpleen -from .stomach import DatasetGroupStomach -from .thymus import DatasetGroupThymus -from .thyroid import DatasetGroupThyroid -from .trachea import DatasetGroupTrachea -from .ureter import DatasetGroupUreter -from .uterus import DatasetGroupUterus diff --git a/sfaira/data/human/adipose/__init__.py b/sfaira/data/human/adipose/__init__.py deleted file mode 100644 index 93e95d11c..000000000 --- a/sfaira/data/human/adipose/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_adipose import DatasetGroupAdipose diff --git a/sfaira/data/human/adipose/external.py b/sfaira/data/human/adipose/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/adipose/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/adipose/human_adipose.py b/sfaira/data/human/adipose/human_adipose.py deleted file mode 100644 index ca12c0d40..000000000 --- a/sfaira/data/human/adipose/human_adipose.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_adipose_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupAdipose(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupAdipose - self.datasets.update(DatasetGroupAdipose(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py deleted file mode 100644 index d72bb0627..000000000 --- a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adipose_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'adipose' - self.sub_tissue = 'AdultAdipose' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adipose", "hcl_AdultAdipose_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/adrenalgland/__init__.py b/sfaira/data/human/adrenalgland/__init__.py deleted file mode 100644 index 4cfcfad3a..000000000 --- a/sfaira/data/human/adrenalgland/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_adrenalgland import DatasetGroupAdrenalgland diff --git a/sfaira/data/human/adrenalgland/external.py b/sfaira/data/human/adrenalgland/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/adrenalgland/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland.py b/sfaira/data/human/adrenalgland/human_adrenalgland.py deleted file mode 100644 index af942823d..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_adrenalgland_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_adrenalgland_2020_microwell_han_002 import Dataset as Dataset0002 -from .human_adrenalgland_2020_microwell_han_003 import Dataset as Dataset0003 -from .human_adrenalgland_2020_microwell_han_004 import Dataset as Dataset0004 -from .human_adrenalgland_2020_microwell_han_005 import Dataset as Dataset0005 -from .human_adrenalgland_2020_microwell_han_006 import Dataset as Dataset0006 - - -class DatasetGroupAdrenalgland(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupAdrenalgland - self.datasets.update(DatasetGroupAdrenalgland(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py deleted file mode 100644 index e9db32eb1..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adrenalgland_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'adrenalgland' - self.sub_tissue = 'NeonatalAdrenalGland' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adrenalgland", "hcl_NeonatalAdrenalGland_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py deleted file mode 100644 index ec1f52bdc..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adrenalgland_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'adrenalgland' - self.sub_tissue = 'FetalAdrenalGland' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py deleted file mode 100644 index 0deb5571f..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adrenalgland_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'adrenalgland' - self.sub_tissue = 'FetalAdrenalGland' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py deleted file mode 100644 index 310c19e76..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adrenalgland_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'adrenalgland' - self.sub_tissue = 'AdultAdrenalGland' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adrenalgland", "hcl_AdultAdrenalGland_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py deleted file mode 100644 index 120868718..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adrenalgland_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'adrenalgland' - self.sub_tissue = 'FetalAdrenalGland' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py deleted file mode 100644 index 437aab383..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adrenalgland_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = 'adrenalgland' - self.sub_tissue = 'AdultAdrenalGland' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adrenalgland", "hcl_AdultAdrenalGland_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/artery/__init__.py b/sfaira/data/human/artery/__init__.py deleted file mode 100644 index b6d2b14a5..000000000 --- a/sfaira/data/human/artery/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_artery import DatasetGroupArtery diff --git a/sfaira/data/human/artery/external.py b/sfaira/data/human/artery/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/artery/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/artery/human_artery.py b/sfaira/data/human/artery/human_artery.py deleted file mode 100644 index 7e51a6d0c..000000000 --- a/sfaira/data/human/artery/human_artery.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_artery_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupArtery(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupArtery - self.datasets.update(DatasetGroupArtery(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py deleted file mode 100644 index 3ea6ffc35..000000000 --- a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_artery_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'artery' - self.sub_tissue = 'AdultArtery' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "artery", "hcl_AdultArtery_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/bladder/__init__.py b/sfaira/data/human/bladder/__init__.py deleted file mode 100644 index e85cb5318..000000000 --- a/sfaira/data/human/bladder/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_bladder import DatasetGroupBladder diff --git a/sfaira/data/human/bladder/external.py b/sfaira/data/human/bladder/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/bladder/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/bladder/human_bladder.py b/sfaira/data/human/bladder/human_bladder.py deleted file mode 100644 index 6efc756b8..000000000 --- a/sfaira/data/human/bladder/human_bladder.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_bladder_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_bladder_2020_microwell_han_002 import Dataset as Dataset0002 -from .human_bladder_2020_microwell_han_003 import Dataset as Dataset0003 - - -class DatasetGroupBladder(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupBladder - self.datasets.update(DatasetGroupBladder(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py deleted file mode 100644 index 5bb5d35d8..000000000 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_bladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'bladder' - self.sub_tissue = 'AdultBladder' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "bladder", "hcl_AdultBladder_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py deleted file mode 100644 index 21ba1594d..000000000 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_bladder_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'bladder' - self.sub_tissue = 'AdultBladder' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "bladder", "hcl_AdultBladder_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py deleted file mode 100644 index 29312ed94..000000000 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_bladder_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'bladder' - self.sub_tissue = 'AdultBladder' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "bladder", "hcl_AdultGallbladder_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/__init__.py b/sfaira/data/human/blood/__init__.py deleted file mode 100644 index 54879d8e2..000000000 --- a/sfaira/data/human/blood/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_blood import DatasetGroupBlood diff --git a/sfaira/data/human/blood/external.py b/sfaira/data/human/blood/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/blood/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/blood/human_blood.py b/sfaira/data/human/blood/human_blood.py deleted file mode 100644 index 1e439ff31..000000000 --- a/sfaira/data/human/blood/human_blood.py +++ /dev/null @@ -1,42 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_blood_2018_10x_ica_001 import Dataset as Dataset0001 -from .human_blood_2019_10x_10xGenomics_001 import Dataset as Dataset0002 -from .human_blood_2020_microwell_han_001 import Dataset as Dataset0003 -from .human_blood_2020_microwell_han_002 import Dataset as Dataset0004 -from .human_blood_2020_microwell_han_003 import Dataset as Dataset0005 -from .human_blood_2020_microwell_han_004 import Dataset as Dataset0006 -from .human_blood_2020_microwell_han_005 import Dataset as Dataset0007 -from .human_blood_2020_microwell_han_006 import Dataset as Dataset0008 -from .human_blood_2020_microwell_han_007 import Dataset as Dataset0009 - - -class DatasetGroupBlood(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path), - Dataset0009(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupBlood - self.datasets.update(DatasetGroupBlood(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py deleted file mode 100644 index 4d733a8b5..000000000 --- a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data file if `load_raw=True` is passed to self.load() - To download the datafile required by this dataloader, use the link provided as the `download_website` attribute of - this class. For (up to 100-fold faster) repeated data loading, please pass `load_raw=False` when calling the - self.load() method. For this, you need to preprocess the raw files as below and place the resulting h5ad file in the - data folder of this organ: - - import anndata - import numpy as np - adata = anndata.read_loom('c95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom') - idx = np.logical_and((adata.obs['derived_organ_parts_label'] == 'umbilical cord blood').values, (adata.obs['emptydrops_is_cell'] == 't').values) - adata = adata[idx].copy() - adata.write('ica_bone.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2018_10x_ica_001_unknown" - self.download_website = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" - self.download_website_meta = None - self.organ = "blood" - self.sub_tissue = "umbilical_cord_blood" - self.annotated = False - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") - self.adata = anndata.read_loom(fn) - idx = np.logical_and((self.adata.obs['derived_organ_parts_label'] == 'umbilical cord blood').values, - (self.adata.obs['emptydrops_is_cell'] == 't').values) - self.adata = self.adata[idx].copy() - - else: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "ica_blood.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Regev' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = None - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = None - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession') diff --git a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py deleted file mode 100644 index 72581d7ee..000000000 --- a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py +++ /dev/null @@ -1,65 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This data loader requires manual preprocessing of the raw datafile. To download the data, use the link in the - `.download_website` attribute of this class. To create the file required by this dataloader, run the following - python code: - - import scanpy - scanpy.read_10x_h5('pbmc_10k_v3_filtered_feature_bc_matrix.h5').write('pbmc_10k_v3_filtered_feature_bc_matrix.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2019_10x_10xGenomics_001_unknown" - self.download_website = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" - self.download_website_meta = None - self.organ = "blood" - self.sub_tissue = "pbmcs" - self.annotated = False - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = '10x Genomics' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = None - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = None - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids') diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py deleted file mode 100644 index 9745f5970..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'AdultPeripheralBlood' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_AdultPeripheralBlood_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py deleted file mode 100644 index ef055d7ba..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'AdultPeripheralBlood' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_PeripheralBlood_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py deleted file mode 100644 index 78c553a83..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'CordBlood' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_CordBlood_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py deleted file mode 100644 index 1e80922d4..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'AdultPeripheralBlood' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_AdultPeripheralBlood_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py deleted file mode 100644 index 0ae2a490e..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'CordBloodCD34P' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_CordBloodCD34P_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py deleted file mode 100644 index b745cb32a..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'CordBloodCD34P' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_CordBloodCD34P_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py deleted file mode 100644 index a96dc2fb5..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_007_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'CordBlood' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_CordBlood_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/bone/__init__.py b/sfaira/data/human/bone/__init__.py deleted file mode 100644 index bdf361783..000000000 --- a/sfaira/data/human/bone/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_bone import DatasetGroupBone diff --git a/sfaira/data/human/bone/external.py b/sfaira/data/human/bone/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/bone/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/bone/human_bone.py b/sfaira/data/human/bone/human_bone.py deleted file mode 100644 index 3a8842e1f..000000000 --- a/sfaira/data/human/bone/human_bone.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_bone_2018_10x_ica_001 import Dataset as Dataset0001 -from .human_bone_2020_microwell_han_001 import Dataset as Dataset0002 -from .human_bone_2020_microwell_han_002 import Dataset as Dataset0003 - - -class DatasetGroupBone(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupBone - self.datasets.update(DatasetGroupBone(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py deleted file mode 100644 index 85b019bbe..000000000 --- a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data file if `load_raw=True` is passed to self.load() - To download the datafile required by this dataloader, use the link provided as the `download_website` attribute of - this class. For (up to 100-fold faster) repeated data loading, please pass `load_raw=False` when calling the - self.load() method. For this, you need to preprocess the raw files as below and place the resulting h5ad file in the - data folder of this organ: - - import anndata - import numpy as np - adata = anndata.read_loom('c95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom') - idx = np.logical_and((adata.obs['derived_organ_parts_label'] == 'bone marrow').values, (adata.obs['emptydrops_is_cell'] == 't').values) - adata = adata[idx].copy() - adata.write('ica_bone.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_bone_2018_10x_ica_unknown" - self.download_website = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" - self.download_website_meta = None - self.organ = "bone" - self.sub_tissue = "bone_marrow" - self.annotated = False - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "bone", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") - self.adata = anndata.read_loom(fn) - idx = np.logical_and((self.adata.obs['derived_organ_parts_label'] == 'bone marrow').values, - (self.adata.obs['emptydrops_is_cell'] == 't').values) - self.adata = self.adata[idx].copy() - - else: - if fn is None: - fn = os.path.join(self.path, "human", "bone", "ica_bone.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Regev' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = None - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = None - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession') diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py deleted file mode 100644 index eace2ce68..000000000 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_bone_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'bone' - self.sub_tissue = 'AdultBoneMarrow' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "bone", "hcl_BoneMarrow_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py deleted file mode 100644 index 3d7d21c78..000000000 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_bone_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'bone' - self.sub_tissue = 'AdultBoneMarrow' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "bone", "hcl_BoneMarrow_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/brain/__init__.py b/sfaira/data/human/brain/__init__.py deleted file mode 100644 index c1a0ef08b..000000000 --- a/sfaira/data/human/brain/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_brain import DatasetGroupBrain diff --git a/sfaira/data/human/brain/external.py b/sfaira/data/human/brain/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/brain/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/brain/human_brain.py b/sfaira/data/human/brain/human_brain.py deleted file mode 100644 index 016c8fce5..000000000 --- a/sfaira/data/human/brain/human_brain.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_brain_2017_DroNcSeq_habib_001 import Dataset as Dataset0001 -from .human_brain_2020_microwell_han_001 import Dataset as Dataset0002 -from .human_brain_2020_microwell_han_002 import Dataset as Dataset0003 -from .human_brain_2020_microwell_han_003 import Dataset as Dataset0004 -from .human_brain_2020_microwell_han_004 import Dataset as Dataset0005 -from .human_brain_2020_microwell_han_005 import Dataset as Dataset0006 -from .human_brain_2020_microwell_han_006 import Dataset as Dataset0007 - - -class DatasetGroupBrain(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupBrain - self.datasets.update(DatasetGroupBrain(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py deleted file mode 100644 index 6610531c7..000000000 --- a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2017_DroNcSeq_habib_001_10.1038/nmeth.4407" - self.download_website = "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad" - self.download_website_meta = None - self.organ = "brain" - self.sub_tissue = "hippocampus, prefrontal cortex" - self.annotated = True - - self.class_maps = { - "0": { - 'exPFC1': 'Glutamatergic neurons from the PFC 1', - 'exPFC2': 'Glutamatergic neurons from the PFC 2', - 'exDG': 'Granule neurons from the hip dentate gyrus region', - 'GABA1': 'GABAergic interneurons 1', - 'GABA2': 'GABAergic interneurons 2', - 'exCA3': 'Pyramidal neurons from the hip CA region 1', - 'exCA1': 'Pyramidal neurons from the hip CA region 2', - 'ODC1': 'Oligodendrocytes', - 'ASC1': 'Astrocytes 1', - 'OPC': 'Oligodendrocyte precursors', - 'ASC2': 'Astrocytes 2', - 'Unclassified': 'Unknown', - 'MG': 'Microglia', - 'NSC': 'Neuronal stem cells', - 'END': 'Endothelial cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "habib17.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Regev" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2017 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/nmeth.4407" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'DroNcSeq' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py deleted file mode 100644 index a3da7a14e..000000000 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'brain' - self.sub_tissue = 'FetalBrain' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Astrocyte': 'Astrocyte', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Endothelial cell (APC)': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Fetal enterocyte ': 'Fetal enterocyte ', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal Neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Oligodendrocyte': 'Oligodendrocytes', - 'Primordial germ cell': 'Primordial germ cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'Neuronal stem cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py deleted file mode 100644 index ffa89b995..000000000 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'brain' - self.sub_tissue = 'FetalBrain' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Astrocyte': 'Astrocyte', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Endothelial cell (APC)': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Fetal enterocyte ': 'Fetal enterocyte ', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal Neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Oligodendrocyte': 'Oligodendrocytes', - 'Primordial germ cell': 'Primordial germ cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'Neuronal stem cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_5.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py deleted file mode 100644 index d06a74c5e..000000000 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'brain' - self.sub_tissue = 'FetalBrain' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Astrocyte': 'Astrocyte', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Endothelial cell (APC)': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Fetal enterocyte ': 'Fetal enterocyte ', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal Neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Oligodendrocyte': 'Oligodendrocytes', - 'Primordial germ cell': 'Primordial germ cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'Neuronal stem cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py deleted file mode 100644 index a5e5d3798..000000000 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'brain' - self.sub_tissue = 'AdultTemporalLobe' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Astrocyte': 'Astrocyte', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Endothelial cell (APC)': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Fetal enterocyte ': 'Fetal enterocyte ', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal Neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Oligodendrocyte': 'Oligodendrocytes', - 'Primordial germ cell': 'Primordial germ cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'Neuronal stem cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "hcl_AdultTemporalLobe_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py deleted file mode 100644 index da1d294fc..000000000 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'brain' - self.sub_tissue = 'FetalBrain' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Astrocyte': 'Astrocyte', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Endothelial cell (APC)': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Fetal enterocyte ': 'Fetal enterocyte ', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal Neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Oligodendrocyte': 'Oligodendrocytes', - 'Primordial germ cell': 'Primordial germ cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'Neuronal stem cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_6.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py deleted file mode 100644 index 0add7c561..000000000 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = 'brain' - self.sub_tissue = 'AdultCerebellum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Astrocyte': 'Astrocyte', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Endothelial cell (APC)': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Fetal enterocyte ': 'Fetal enterocyte ', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal Neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Oligodendrocyte': 'Oligodendrocytes', - 'Primordial germ cell': 'Primordial germ cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'Neuronal stem cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "hcl_AdultCerebellum_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/calvaria/__init__.py b/sfaira/data/human/calvaria/__init__.py deleted file mode 100644 index dfad96236..000000000 --- a/sfaira/data/human/calvaria/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_calvaria import DatasetGroupCalvaria diff --git a/sfaira/data/human/calvaria/external.py b/sfaira/data/human/calvaria/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/calvaria/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/calvaria/human_calvaria.py b/sfaira/data/human/calvaria/human_calvaria.py deleted file mode 100644 index bd37cd357..000000000 --- a/sfaira/data/human/calvaria/human_calvaria.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_calvaria_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupCalvaria(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupCalvaria - self.datasets.update(DatasetGroupCalvaria(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py deleted file mode 100644 index 6c18f11c7..000000000 --- a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_calvaria_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'calvaria' - self.sub_tissue = 'FetalCalvaria' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "calvaria", "hcl_FetalCalvaria_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/cervix/__init__.py b/sfaira/data/human/cervix/__init__.py deleted file mode 100644 index 5b71011e8..000000000 --- a/sfaira/data/human/cervix/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_cervix import DatasetGroupCervix diff --git a/sfaira/data/human/cervix/external.py b/sfaira/data/human/cervix/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/cervix/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/cervix/human_cervix.py b/sfaira/data/human/cervix/human_cervix.py deleted file mode 100644 index 9468a333b..000000000 --- a/sfaira/data/human/cervix/human_cervix.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_cervix_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupCervix(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupCervix - self.datasets.update(DatasetGroupCervix(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py deleted file mode 100644 index 400d4808f..000000000 --- a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_cervix_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'cervix' - self.sub_tissue = 'AdultCervix' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "cervix", "hcl_AdultCervix_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/chorionicvillus/__init__.py b/sfaira/data/human/chorionicvillus/__init__.py deleted file mode 100644 index 1265da611..000000000 --- a/sfaira/data/human/chorionicvillus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_chorionicvillus import DatasetGroupChorionicvillus diff --git a/sfaira/data/human/chorionicvillus/external.py b/sfaira/data/human/chorionicvillus/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/chorionicvillus/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus.py deleted file mode 100644 index 030412ab7..000000000 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_chorionicvillus_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupChorionicvillus(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupChorionicvillus - self.datasets.update(DatasetGroupChorionicvillus(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py deleted file mode 100644 index 1c9bfb424..000000000 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_chorionicvillus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'chorionicvillus' - self.sub_tissue = 'ChorionicVillus' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "chorionicvillus", "hcl_ChorionicVillus_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/colon/__init__.py b/sfaira/data/human/colon/__init__.py deleted file mode 100644 index 79ba22cee..000000000 --- a/sfaira/data/human/colon/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_colon import DatasetGroupColon diff --git a/sfaira/data/human/colon/external.py b/sfaira/data/human/colon/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/colon/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/colon/human_colon.py b/sfaira/data/human/colon/human_colon.py deleted file mode 100644 index fa2642d40..000000000 --- a/sfaira/data/human/colon/human_colon.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_colon_2019_10x_kinchen_001 import Dataset as Dataset0001 -from .human_colon_2019_10x_smilie_001 import Dataset as Dataset0002 -from .human_colon_2019_10x_wang_001 import Dataset as Dataset0003 -from .human_colon_2020_10x_james_001 import Dataset as Dataset0004 -from .human_colon_2020_microwell_han_001 import Dataset as Dataset0005 -from .human_colon_2020_microwell_han_002 import Dataset as Dataset0006 -from .human_colon_2020_microwell_han_003 import Dataset as Dataset0007 -from .human_colon_2020_microwell_han_004 import Dataset as Dataset0008 - - -class DatasetGroupColon(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupColon - self.datasets.update(DatasetGroupColon(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py deleted file mode 100644 index 616dc5927..000000000 --- a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py +++ /dev/null @@ -1,150 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data file if `load_raw=True` is passed to self.load() - To download the datafile required by this dataloader, use the link provided as the `download_website` attribute of - this class and obtain cell type annotations ('hc_meta_data_stromal_with_donor.txt' and - 'uc_meta_data_stromal_with_donor.txt') directly from the authors of the paper. For (up - to 100-fold faster) repeated data loading, please pass `load_raw=False` when calling the self.load() method. For - this, you need to preprocess the raw files as below and place the resulting h5ad file in the data folder of this - organ: - - import anndata - import pandas as pd - - adata = anndata.read_loom('f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom') - ctuc = pd.read_csv('uc_meta_data_stromal_with_donor.txt', sep='\t') - cthealthy = pd.read_csv('hc_meta_data_stromal_with_donor.txt', sep='\t') - - adata = adata[adata.obs['emptydrops_is_cell'] == 't'].copy() - adata = adata[adata.X.sum(axis=1).flatten() >= 250].copy() - - uc = adata[adata.obs['donor_organism.diseases.ontology_label'] == "ulcerative colitis (disease)"].copy() - bcuc = [i.split('-')[0] for i in ctuc['Barcode']] - seluc = [] - for i in uc.obs['barcode']: - seluc.append((uc.obs['barcode'].str.count(i).sum() == 1) and i in bcuc) - uc = uc[seluc].copy() - ctuc.index = [i.split('-')[0] for i in ctuc['Barcode']] - uc.obs['celltype'] = [ctuc.loc[i]['Cluster'] for i in uc.obs['barcode']] - uc.var = uc.var.reset_index().rename(columns={'index': 'names'}).set_index('featurekey') - - healthy = adata[adata.obs['donor_organism.diseases.ontology_label'] == "normal"].copy() - bchealthy = [i.split('-')[0] for i in cthealthy['Barcode']] - selhealthy = [] - for i in healthy.obs['barcode']: - selhealthy.append((healthy.obs['barcode'].str.count(i).sum() == 1) and i in bchealthy) - healthy = healthy[selhealthy].copy() - cthealthy.index = [i.split('-')[0] for i in cthealthy['Barcode']] - healthy.obs['celltype'] = [cthealthy.loc[i]['Cluster'] for i in healthy.obs['barcode']] - healthy.var = healthy.var.reset_index().rename(columns={'index': 'names'}).set_index('featurekey') - - adata = healthy.concatenate(uc) - adata.write('kinchenetal.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2019_10x_kinchen_001_10.1016/j.cell.2018.08.067" - self.download_website = "https://data.humancellatlas.org/project-assets/project-matrices/f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom" - self.download_website_meta = 'private' - self.organ = "colon" - self.sub_tissue = "lamina propria of mucosa of colon" - self.annotated = True - - self.class_maps = { - "0": { - "Endothelial 1": "Endothelial", - "Endothelial 2": "Endothelial", - "Glial": "Glial cells", - "Myofibroblasts": "Myofibroblasts", - "Pericyte 1": "Pericytes", - "Pericyte 2": "Pericytes", - "Pericytes": "Pericytes", - "Plasma Cells": "Plasma Cells", - "Smooth Muscle": "Smooth Muscle", - "Stromal 1": "Stromal", - "Stromal 2a": "Stromal", - "Stromal 2b": "Stromal", - "Stromal 3": "Stromal", - "Stromal 4": "Stromal", - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "colon", "f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), - os.path.join(self.path, "human", "colon", "uc_meta_data_stromal_with_donor.txt"), - os.path.join(self.path, "human", "colon", "hc_meta_data_stromal_with_donor.txt") - ] - adata = anndata.read_loom(fn[0]) - ctuc = pd.read_csv(fn[1], sep='\t') - cthealthy = pd.read_csv(fn[2], sep='\t') - adata = adata[adata.obs['emptydrops_is_cell'] == 't'].copy() - adata = adata[adata.X.sum(axis=1).flatten() >= 250].copy() - uc = adata[adata.obs['donor_organism.diseases.ontology_label'] == "ulcerative colitis (disease)"].copy() - bcuc = [i.split('-')[0] for i in ctuc['Barcode']] - seluc = [] - for i in uc.obs['barcode']: - seluc.append((uc.obs['barcode'].str.count(i).sum() == 1) and i in bcuc) - uc = uc[seluc].copy() - ctuc.index = [i.split('-')[0] for i in ctuc['Barcode']] - uc.obs['celltype'] = [ctuc.loc[i]['Cluster'] for i in uc.obs['barcode']] - uc.var = uc.var.reset_index().rename(columns={'index': 'names'}).set_index('featurekey') - healthy = adata[adata.obs['donor_organism.diseases.ontology_label'] == "normal"].copy() - bchealthy = [i.split('-')[0] for i in cthealthy['Barcode']] - selhealthy = [] - for i in healthy.obs['barcode']: - selhealthy.append((healthy.obs['barcode'].str.count(i).sum() == 1) and i in bchealthy) - healthy = healthy[selhealthy].copy() - cthealthy.index = [i.split('-')[0] for i in cthealthy['Barcode']] - healthy.obs['celltype'] = [cthealthy.loc[i]['Cluster'] for i in healthy.obs['barcode']] - healthy.var = healthy.var.reset_index().rename(columns={'index': 'names'}).set_index('featurekey') - self.adata = healthy.concatenate(uc) - - else: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "kinchenetal.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Simmons' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.08.067" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = [line == 'normal' for line in - self.adata.obs['donor_organism.diseases.ontology_label']] - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['donor_organism.diseases.ontology_label'].astype('category') - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact]\ - .cat.rename_categories({'normal': 'healthy', 'ulcerative colitis (disease)': 'ulcerative colitis'}) - - self._convert_and_set_var_names(symbol_col="names", ensembl_col='Accession') diff --git a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py deleted file mode 100644 index 3f5493506..000000000 --- a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py +++ /dev/null @@ -1,106 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. This dataloader only provides the subset of the published sata which has been made available through the - covid-19 Cell Atlas. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" - self.download_website = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" - self.download_website_meta = None - self.organ = "colon" - self.sub_tissue = "colonic epithelium" - self.annotated = True - - self.class_maps = { - "0": { - 'Cycling TA': 'Cycling TA', - 'TA 1': 'TA 1', - 'TA 2': 'TA 2', - 'Immature Enterocytes 2': 'Immature Enterocytes 2', - 'Immature Enterocytes 1': 'Immature Enterocytes 1', - 'Enterocyte Progenitors': 'Enterocyte Progenitors', - 'Immature Goblet': 'Immature Goblet', - 'Enterocytes': 'Enterocytes', - 'Secretory TA': 'Secretory TA', - 'Best4+ Enterocytes': 'Best4+ Enterocytes', - 'CD8+ IELs': 'CD8+ IELs', - 'Goblet': 'Goblet cells', - 'Stem': 'Stem cells', - 'Tuft': 'Tuft', - 'Follicular': 'Follicular', - 'Enteroendocrine': 'Enteroendocrine cells', - 'Plasma': 'Plasma Cells', - 'CD4+ Memory': 'CD4+ Memory', - 'CD8+ LP': 'CD8+ LP', - 'CD69- Mast': 'CD69- Mast', - 'Macrophages': 'Macrophage', - 'GC': 'Glial cells', - 'Cycling B': 'B cell cycling', - 'CD4+ Activated Fos-hi': 'CD4+ T Activated Fos-hi', - 'CD4+ Activated Fos-lo': 'CD4+ T Activated Fos-lo', - 'NKs': 'NK', - 'Cycling T': 'Cycling T', - 'M cells': 'M cells', - 'CD69+ Mast': 'CD69+ Mast', - 'MT-hi': 'MT-hi', - 'CD8+ IL17+': 'CD8+ IL17+', - 'CD4+ PD1+': 'CD4+ PD1+', - 'DC2': 'DC2', - 'Treg': 'Treg', - 'ILCs': 'ILC', - 'DC1': 'DC1', - 'WNT2B+ Fos-lo 1': 'WNT2B+ Fos-lo 1', - 'WNT5B+ 2': 'WNT5B+ 2', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "smillie19_epi.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Regev" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2019.06.029" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py deleted file mode 100644 index 6ab9ba0bd..000000000 --- a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py +++ /dev/null @@ -1,74 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2019_10x_wang_001_10.1084/jem.20191130" - self.download_website = "https://covid19.cog.sanger.ac.uk/wang20_colon.processed.h5ad" - self.download_website_meta = None - self.organ = "colon" - self.sub_tissue = "colon" - self.annotated = True - - self.class_maps = { - "0": { - 'Progenitor': 'Enterocyte Progenitors', - 'Enterocyte': 'Enterocytes', - 'Goblet': 'Goblet cells', - 'TA': 'TA', - 'Paneth-like': 'Paneth cells', - 'Stem Cell': 'Stem cells', - 'Enteriendocrine': 'Enteroendocrine cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "wang20_colon.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Chen" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py deleted file mode 100644 index 0bd47f057..000000000 --- a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. This dataloader only provides the subset of the published sata which has been made available through the - covid-19 Cell Atlas. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2019_10x_james_001_10.1038/s41590-020-0602-z" - self.download_website = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" - self.download_website_meta = None - self.organ = "colon" - self.sub_tissue = "colonic immune cells" - self.annotated = True - - self.class_maps = { - "0": { - 'Activated CD4 T': 'Activated CD4 T', - 'B cell IgA Plasma': 'B cell IgA Plasma', - 'B cell IgG Plasma': 'B cell IgG Plasma', - 'B cell cycling': 'B cell cycling', - 'B cell memory': 'B cell memory', - 'CD8 T': 'CD8 T', - 'Follicular B cell': 'Follicular', - 'ILC': 'ILC', - 'LYVE1 Macrophage': 'LYVE1 Macrophage', - 'Lymphoid DC': 'Lymphoid DC', - 'Macrophage': 'Macrophage', - 'Mast': 'Mast cell', - 'Monocyte': 'Monocyte', - 'NK': 'NK', - 'Tcm': 'Tcm', - 'Tfh': 'Tfh', - 'Th1': 'Th1', - 'Th17': 'Th17', - 'Treg': 'Treg', - 'cDC1': 'DC1', - 'cDC2': 'DC2', - 'cycling DCs': 'cycling DCs', - 'cycling gd T': 'cycling gd T', - 'gd T': 'gd T', - 'pDC': 'pDC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "james20.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Teichmann" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41590-020-0602-z" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['cell_type'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids') diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py deleted file mode 100644 index c83fd925f..000000000 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Colon' - self.sub_tissue = 'AdultAscendingColon' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Enterocyte progenitor': 'Enterocyte Progenitors', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Enterocyte': 'Enterocytes', - 'Epithelial cell': 'Epithelial cell', - 'T cell': 'T cell', - 'Stromal cell': 'Stromal', - 'Macrophage': 'Macrophage', - 'B cell': 'B cell', - 'Smooth muscle cell': 'Smooth Muscle', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (APC)': 'Endothelial', - 'Dendritic cell': 'Dendritic cell', - 'Mast cell': 'Mast cell', - 'Endothelial cell': 'Endothelial', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal epithelial progenitor': 'Enterocyte Progenitors', - 'Fibroblast': 'Fibroblast', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial', - 'Fetal stromal cell': 'Stromal', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Monocyte': 'Monocyte', - 'Erythroid cell': 'Erythroid cell', - 'Fetal endocrine cell': 'Enteroendocrine cells', - 'Primordial germ cell': 'Primordial germ cell', - 'Fetal enterocyte': 'Fetal enterocyte', - 'M2 Macrophage': 'Macrophage', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "hcl_AdultAscendingColon_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py deleted file mode 100644 index 3cd8d5469..000000000 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'Colon' - self.sub_tissue = 'AdultTransverseColon' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Enterocyte progenitor': 'Enterocyte Progenitors', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Enterocyte': 'Enterocytes', - 'Epithelial cell': 'Epithelial cell', - 'T cell': 'T cell', - 'Stromal cell': 'Stromal', - 'Macrophage': 'Macrophage', - 'B cell': 'B cell', - 'Smooth muscle cell': 'Smooth Muscle', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (APC)': 'Endothelial', - 'Dendritic cell': 'Dendritic cell', - 'Mast cell': 'Mast cell', - 'Endothelial cell': 'Endothelial', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal epithelial progenitor': 'Enterocyte Progenitors', - 'Fibroblast': 'Fibroblast', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial', - 'Fetal stromal cell': 'Stromal', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Monocyte': 'Monocyte', - 'Erythroid cell': 'Erythroid cell', - 'Fetal endocrine cell': 'Enteroendocrine cells', - 'Primordial germ cell': 'Primordial germ cell', - 'Fetal enterocyte': 'Fetal enterocyte', - 'M2 Macrophage': 'Macrophage', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py deleted file mode 100644 index 56d1f309a..000000000 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'Colon' - self.sub_tissue = 'AdultTransverseColon' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Enterocyte progenitor': 'Enterocyte Progenitors', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Enterocyte': 'Enterocytes', - 'Epithelial cell': 'Epithelial cell', - 'T cell': 'T cell', - 'Stromal cell': 'Stromal', - 'Macrophage': 'Macrophage', - 'B cell': 'B cell', - 'Smooth muscle cell': 'Smooth Muscle', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (APC)': 'Endothelial', - 'Dendritic cell': 'Dendritic cell', - 'Mast cell': 'Mast cell', - 'Endothelial cell': 'Endothelial', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal epithelial progenitor': 'Enterocyte Progenitors', - 'Fibroblast': 'Fibroblast', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial', - 'Fetal stromal cell': 'Stromal', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Monocyte': 'Monocyte', - 'Erythroid cell': 'Erythroid cell', - 'Fetal endocrine cell': 'Enteroendocrine cells', - 'Primordial germ cell': 'Primordial germ cell', - 'Fetal enterocyte': 'Fetal enterocyte', - 'M2 Macrophage': 'Macrophage', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py deleted file mode 100644 index acdcef798..000000000 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'Colon' - self.sub_tissue = 'AdultSigmoidColon' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Enterocyte progenitor': 'Enterocyte Progenitors', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Enterocyte': 'Enterocytes', - 'Epithelial cell': 'Epithelial cell', - 'T cell': 'T cell', - 'Stromal cell': 'Stromal', - 'Macrophage': 'Macrophage', - 'B cell': 'B cell', - 'Smooth muscle cell': 'Smooth Muscle', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (APC)': 'Endothelial', - 'Dendritic cell': 'Dendritic cell', - 'Mast cell': 'Mast cell', - 'Endothelial cell': 'Endothelial', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal epithelial progenitor': 'Enterocyte Progenitors', - 'Fibroblast': 'Fibroblast', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial', - 'Fetal stromal cell': 'Stromal', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Monocyte': 'Monocyte', - 'Erythroid cell': 'Erythroid cell', - 'Fetal endocrine cell': 'Enteroendocrine cells', - 'Primordial germ cell': 'Primordial germ cell', - 'Fetal enterocyte': 'Fetal enterocyte', - 'M2 Macrophage': 'Macrophage', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/duodenum/__init__.py b/sfaira/data/human/duodenum/__init__.py deleted file mode 100644 index b8a98c3be..000000000 --- a/sfaira/data/human/duodenum/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_duodenum import DatasetGroupDuodenum diff --git a/sfaira/data/human/duodenum/external.py b/sfaira/data/human/duodenum/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/duodenum/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/duodenum/human_duodenum.py b/sfaira/data/human/duodenum/human_duodenum.py deleted file mode 100644 index c4d3b8bba..000000000 --- a/sfaira/data/human/duodenum/human_duodenum.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_duodenum_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupDuodenum(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupDuodenum - self.datasets.update(DatasetGroupDuodenum(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py deleted file mode 100644 index be0e66b06..000000000 --- a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_duodenum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'duodenum' - self.sub_tissue = 'AdultDuodenum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "duodenum", "hcl_AdultDuodenum_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/epityphlon/__init__.py b/sfaira/data/human/epityphlon/__init__.py deleted file mode 100644 index 1463f978a..000000000 --- a/sfaira/data/human/epityphlon/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_epityphlon import DatasetGroupEpityphlon diff --git a/sfaira/data/human/epityphlon/external.py b/sfaira/data/human/epityphlon/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/epityphlon/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/epityphlon/human_epityphlon.py b/sfaira/data/human/epityphlon/human_epityphlon.py deleted file mode 100644 index 3330e03bf..000000000 --- a/sfaira/data/human/epityphlon/human_epityphlon.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_epityphlon_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupEpityphlon(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupEpityphlon - self.datasets.update(DatasetGroupEpityphlon(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py deleted file mode 100644 index e68044b9e..000000000 --- a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_epityphlon_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'epityphlon' - self.sub_tissue = 'AdultEpityphlon' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "epityphlon", "hcl_AdultEpityphlon_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/esophagus/__init__.py b/sfaira/data/human/esophagus/__init__.py deleted file mode 100644 index 8dc074247..000000000 --- a/sfaira/data/human/esophagus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_esophagus import DatasetGroupEsophagus diff --git a/sfaira/data/human/esophagus/external.py b/sfaira/data/human/esophagus/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/esophagus/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/esophagus/human_esophagus.py b/sfaira/data/human/esophagus/human_esophagus.py deleted file mode 100644 index 2e3df391e..000000000 --- a/sfaira/data/human/esophagus/human_esophagus.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_esophagus_2019_10x_madissoon_001 import Dataset as Dataset0001 -from .human_esophagus_2020_microwell_han_001 import Dataset as Dataset0002 -from .human_esophagus_2020_microwell_han_002 import Dataset as Dataset0003 - - -class DatasetGroupEsophagus(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupEsophagus - self.datasets.update(DatasetGroupEsophagus(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py deleted file mode 100644 index 14c097816..000000000 --- a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_esophagus_2019_10x_madissoon_001_10.1101/741405" - self.download_website = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" - # Associated HCA project: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 - self.download_website_meta = None - self.organ = "esophagus" - self.sub_tissue = "esophagus" - self.annotated = True - - self.class_maps = { - "0": { - "B_CD27neg": "B_CD27neg", - "B_CD27pos": "B_CD27pos", - "Blood_vessel": "Blood_vessel", - "Dendritic_Cells": "Dendritic cell", - "Epi_basal": "Basal cell", - "Epi_dividing": "Epi_dividing", - "Epi_stratified": "Stratified epithelial cell", - "Epi_suprabasal": "Epi_suprabasal", - "Epi_upper": "Epi_upper", - "Glands_duct": "Glands_duct", - "Glands_mucous": "Glands_mucous", - "Lymph_vessel": "Lymph_vessel", - "Mast_cell": "Mast cell", - "Mono_macro": "Mono_macro", - "NK_T_CD8_Cytotoxic": "NK_T_CD8_Cytotoxic", - "Stroma": "Stromal cell", - "T_CD4": "T_CD4", - "T_CD8": "T_CD8", - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "esophagus", "oesophagus.cellxgene.h5ad") - self.adata = anndata.read(fn) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Meyer" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/741405" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Celltypes'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids-HCATisStab7413619', - new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) \ No newline at end of file diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py deleted file mode 100644 index 860dabe23..000000000 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_esophagus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Esophagus' - self.sub_tissue = 'AdultEsophagus' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Fibroblast': 'Fibroblast', - 'Basal cell': 'Basal cell', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Macrophage': 'Macrophage', - 'B cell': 'B cell', - 'T cell': 'T cell', - 'Dendritic cell': 'Dendritic cell', - 'Mast cell': 'Mast cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Stromal cell': 'Stromal cell', - 'Monocyte': 'Monocyte', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Endothelial cell': 'Endothelial cell', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Fetal stromal cell': 'Fetal stromal cell', - 'CB CD34+': 'CB CD34+', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Loop of Henle': 'Loop of Henle', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "esophagus", "hcl_AdultEsophagus_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py deleted file mode 100644 index ab5d04b70..000000000 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_esophagus_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'Esophagus' - self.sub_tissue = 'AdultEsophagus' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Fibroblast': 'Fibroblast', - 'Basal cell': 'Basal cell', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Macrophage': 'Macrophage', - 'B cell': 'B cell', - 'T cell': 'T cell', - 'Dendritic cell': 'Dendritic cell', - 'Mast cell': 'Mast cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Stromal cell': 'Stromal cell', - 'Monocyte': 'Monocyte', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Endothelial cell': 'Endothelial cell', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Fetal stromal cell': 'Fetal stromal cell', - 'CB CD34+': 'CB CD34+', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Loop of Henle': 'Loop of Henle', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "esophagus", "hcl_AdultEsophagus_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/eye/__init__.py b/sfaira/data/human/eye/__init__.py deleted file mode 100644 index 345236753..000000000 --- a/sfaira/data/human/eye/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_eye import DatasetGroupEye diff --git a/sfaira/data/human/eye/external.py b/sfaira/data/human/eye/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/eye/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/eye/human_eye.py b/sfaira/data/human/eye/human_eye.py deleted file mode 100644 index 5a3c43f93..000000000 --- a/sfaira/data/human/eye/human_eye.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_eye_2019_10x_lukowski_001 import Dataset as Dataset0001 -from .human_eye_2019_10x_menon_001 import Dataset as Dataset0002 -from .human_eye_2019_10x_voigt_001 import Dataset as Dataset0003 -from .human_eye_2020_microwell_han_001 import Dataset as Dataset0004 - - -class DatasetGroupEye(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupEye - self.datasets.update(DatasetGroupEye(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py deleted file mode 100644 index f73d4e388..000000000 --- a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_eye_2019_10x_lukowski_001_10.15252/embj.2018100811" - self.download_website = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" - self.download_website_meta = None - self.organ = "eye" - self.sub_tissue = "retina" - self.annotated = True - - self.class_maps = { - "0": { - 'Muller cell': 'Muller cell', - 'amacrine cell': 'Amacrine cell', - 'microglial cell': 'Microglia', - 'retinal bipolar neuron type A': 'Retinal bipolar neuron type A', - 'retinal bipolar neuron type B': 'Retinal bipolar neuron type B', - 'retinal bipolar neuron type C': 'Retinal bipolar neuron type C', - 'retinal bipolar neuron type D': 'Retinal bipolar neuron type D', - 'retinal cone cell': 'Retinal cone cell', - 'retinal ganglion cell': 'Retinal ganglion cell', - 'retinal rod cell type A': 'Retinal rod cell type A', - 'retinal rod cell type B': 'Retinal rod cell type B', - 'retinal rod cell type C': 'Retinal rod cell type C', - 'unannotated': 'Unknown', - 'unspecified': 'Unknown', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "eye", "lukowski19.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Wong' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.15252/embj.2018100811' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids') diff --git a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py deleted file mode 100644 index 5701ac731..000000000 --- a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_eye_2019_10x_menon_001_10.1038/s41467-019-12780-8" - self.download_website = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" - self.download_website_meta = None - self.organ = "eye" - self.sub_tissue = "retina" - self.annotated = True - - self.class_maps = { - "0": { - 'ACs': 'Amacrine cell', - 'BPs': 'BPs', - 'Cones': 'Retinal cone cell', - 'Endo': 'Endothelial cell', - 'HCs': 'Horizontal cells', - 'Macroglia': 'Macroglia', - 'Microglia': 'Microglia', - 'RGCs': 'Retinal ganglion cell', - 'Rods': 'Rods', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "eye", "menon19.processed.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Hafler' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-019-12780-8' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py deleted file mode 100644 index 31dfce8a7..000000000 --- a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py +++ /dev/null @@ -1,75 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_eye_2019_10x_voigt_001_10.1073/pnas.1914143116" - self.download_website = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" - self.download_website_meta = None - self.organ = "eye" - self.sub_tissue = "retina" - self.annotated = True - - self.class_maps = { - "0": { - 'B-cell': 'B-cell', - 'Endothelial': 'Endothelial cell', - 'Fibroblast': 'Fibroblast', - 'Macrophage': 'Macrophage', - 'Mast-cell': 'Mast-cell', - 'Melanocyte': 'Melanocyte', - 'Pericyte': 'Pericyte', - 'RPE': 'Retinal pigment epithelium', - 'Schwann1': 'Schwann1', - 'Schwann2': 'Schwann2', - 'T/NK-cell': 'T/NK-cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "eye", "voigt19.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Mullins' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1073/pnas.1914143116' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py deleted file mode 100644 index 88f2468c5..000000000 --- a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_eye_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Eye' - self.sub_tissue = 'FetalEyes' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Fetal neuron': 'Fetal neuron', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Erythroid cell': 'Erythroid cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Endothelial cell': 'Endothelial cell', - 'Fetal skeletal muscle cell': 'Fetal skeletal muscle cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal Neuron': 'Fetal neuron', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Dendritic cell': 'Dendritic cell', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Macrophage': 'Macrophage', - 'T cell': 'T cell', - 'Basal cell': 'Basal cell', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Epithelial cell (intermediated)': 'Epithelial cell (intermediated)', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'CB CD34+': 'CB CD34_pos', - 'hESC': 'hESC' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "eye", "hcl_FetalEyes_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/fallopiantube/__init__.py b/sfaira/data/human/fallopiantube/__init__.py deleted file mode 100644 index 4f16e3956..000000000 --- a/sfaira/data/human/fallopiantube/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_fallopiantube import DatasetGroupFallopiantube diff --git a/sfaira/data/human/fallopiantube/external.py b/sfaira/data/human/fallopiantube/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/fallopiantube/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube.py b/sfaira/data/human/fallopiantube/human_fallopiantube.py deleted file mode 100644 index cff1f8131..000000000 --- a/sfaira/data/human/fallopiantube/human_fallopiantube.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_fallopiantube_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupFallopiantube(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupFallopiantube - self.datasets.update(DatasetGroupFallopiantube(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py deleted file mode 100644 index cd3d107e2..000000000 --- a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_fallopiantube_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'fallopiantube' - self.sub_tissue = 'AdultFallopiantube' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "fallopiantube", "hcl_AdultFallopiantube_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/femalegonad/__init__.py b/sfaira/data/human/femalegonad/__init__.py deleted file mode 100644 index bbb59f91d..000000000 --- a/sfaira/data/human/femalegonad/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_femalegonad import DatasetGroupFemalegonad diff --git a/sfaira/data/human/femalegonad/external.py b/sfaira/data/human/femalegonad/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/femalegonad/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/femalegonad/human_femalegonad.py b/sfaira/data/human/femalegonad/human_femalegonad.py deleted file mode 100644 index 790322e71..000000000 --- a/sfaira/data/human/femalegonad/human_femalegonad.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_femalegonad_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_femalegonad_2020_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupFemalegonad(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupFemalegonad - self.datasets.update(DatasetGroupFemalegonad(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py deleted file mode 100644 index 78b0e1cc5..000000000 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_femalegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'femalegonad' - self.sub_tissue = 'FetalFemaleGonad' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "femalegonad", "hcl_FetalFemaleGonad_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py deleted file mode 100644 index 45c00bf50..000000000 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_femalegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'femalegonad' - self.sub_tissue = 'FetalFemaleGonad' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "femalegonad", "hcl_FetalFemaleGonad_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/gallbladder/__init__.py b/sfaira/data/human/gallbladder/__init__.py deleted file mode 100644 index de13546c3..000000000 --- a/sfaira/data/human/gallbladder/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_gallbladder import DatasetGroupGallbladder diff --git a/sfaira/data/human/gallbladder/external.py b/sfaira/data/human/gallbladder/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/gallbladder/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/gallbladder/human_gallbladder.py b/sfaira/data/human/gallbladder/human_gallbladder.py deleted file mode 100644 index aa015fe75..000000000 --- a/sfaira/data/human/gallbladder/human_gallbladder.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_gallbladder_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupGallbladder(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupGallbladder - self.datasets.update(DatasetGroupGallbladder(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py deleted file mode 100644 index 3d685e0f3..000000000 --- a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_gallbladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'gallbladder' - self.sub_tissue = 'AdultGallbladder' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "gallbladder", "hcl_AdultGallbladder_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/heart/__init__.py b/sfaira/data/human/heart/__init__.py deleted file mode 100644 index b6fe327b0..000000000 --- a/sfaira/data/human/heart/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_heart import DatasetGroupHeart diff --git a/sfaira/data/human/heart/external.py b/sfaira/data/human/heart/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/heart/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/heart/human_heart.py b/sfaira/data/human/heart/human_heart.py deleted file mode 100644 index a1c2195a7..000000000 --- a/sfaira/data/human/heart/human_heart.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_heart_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_heart_2020_microwell_han_002 import Dataset as Dataset0002 -from .human_heart_2020_microwell_han_003 import Dataset as Dataset0003 -from .human_heart_2020_microwell_han_004 import Dataset as Dataset0004 - - -class DatasetGroupHeart(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupHeart - self.datasets.update(DatasetGroupHeart(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py deleted file mode 100644 index 325d4e08e..000000000 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_heart_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'heart' - self.sub_tissue = 'FetalHeart' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "heart", "hcl_FetalHeart_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py deleted file mode 100644 index bc8eeb41a..000000000 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_heart_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'heart' - self.sub_tissue = 'AdultHeart' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "heart", "hcl_AdultHeart_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py deleted file mode 100644 index 01fbee187..000000000 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_heart_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'heart' - self.sub_tissue = 'AdultHeart' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "heart", "hcl_AdultHeart_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py deleted file mode 100644 index 24b48e6bf..000000000 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_heart_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'heart' - self.sub_tissue = 'FetalHeart' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "heart", "hcl_FetalHeart_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/hesc/__init__.py b/sfaira/data/human/hesc/__init__.py deleted file mode 100644 index 741b9caa4..000000000 --- a/sfaira/data/human/hesc/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_hesc import DatasetGroupHesc diff --git a/sfaira/data/human/hesc/external.py b/sfaira/data/human/hesc/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/hesc/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/hesc/human_hesc.py b/sfaira/data/human/hesc/human_hesc.py deleted file mode 100644 index ffc258d70..000000000 --- a/sfaira/data/human/hesc/human_hesc.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_hesc_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupHesc(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupHesc - self.datasets.update(DatasetGroupHesc(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py deleted file mode 100644 index 4553eedae..000000000 --- a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_hesc_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'hesc' - self.sub_tissue = 'HESC' - self.dev_stage = 'HESC' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "hesc", "hcl_HESC_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/ileum/__init__.py b/sfaira/data/human/ileum/__init__.py deleted file mode 100644 index cb7ce42d4..000000000 --- a/sfaira/data/human/ileum/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_ileum import DatasetGroupIleum diff --git a/sfaira/data/human/ileum/external.py b/sfaira/data/human/ileum/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/ileum/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/ileum/human_ileum.py b/sfaira/data/human/ileum/human_ileum.py deleted file mode 100644 index 53c884c69..000000000 --- a/sfaira/data/human/ileum/human_ileum.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_ileum_2019_10x_martin_001 import Dataset as Dataset0001 -from .human_ileum_2019_10x_wang_001 import Dataset as Dataset0002 -from .human_ileum_2020_microwell_han_001 import Dataset as Dataset0003 - - -class DatasetGroupIleum(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupIleum - self.datasets.update(DatasetGroupIleum(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py deleted file mode 100644 index 1de123a07..000000000 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_ileum_2019_10x_martin_001_10.1016/j.cell.2019.08.008" - self.download_website = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" - self.download_website_meta = None - self.organ = "ileum" - self.sub_tissue = "ileum" - self.annotated = True - - self.class_maps = { - "0": { - 'T cells': 'T cells', - 'Plasma cells': 'Plasma Cells', - 'B cells': 'B cells', - 'MNP': 'MNP', - 'ILC': 'ILC', - 'Enterocytes': 'Enterocytes', - 'Fibs': 'Fibroblasts', - 'CD36+ endothelium': 'CD36+ endothelium', - 'Progenitors': 'Progenitors', - 'Goblets': 'Goblet cells', - 'Glial cells': 'Glial cells', - 'Cycling': 'Cycling', - 'ACKR1+ endothelium': 'ACKR1+ endothelium', - 'Pericytes': 'Pericytes', - 'Lymphatics': 'Lymphatics', - 'Mast cells': 'Mast cells', - 'SM': 'Smooth muscle cell', - 'TA': 'TA', - 'Paneth cells': 'Paneth cells', - 'Enteroendocrines': 'Enteroendocrine cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "ileum", "martin19.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - self.adata = self.adata[self.adata.obs['CellType'] != 'Doublets'].copy() - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Kenigsberg" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2019.08.008" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids') diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py deleted file mode 100644 index 9e9a6a79f..000000000 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py +++ /dev/null @@ -1,74 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_ileum_2019_10x_wang_001_10.1084/jem.20191130" - self.download_website = "https://covid19.cog.sanger.ac.uk/wang20_ileum.processed.h5ad" - self.download_website_meta = None - self.organ = "ileum" - self.sub_tissue = "ileum" - self.annotated = True - - self.class_maps = { - "0": { - 'Progenitor': 'Progenitors', - 'Goblet': 'Goblet cells', - 'Enterocyte': 'Enterocytes', - 'Paneth-like': 'Paneth cells', - 'Stem Cell': 'Stem Cell', - 'TA': 'TA', - 'Enteriendocrine': 'Enteroendocrine cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "ileum", "wang20_ileum.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Chen" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py deleted file mode 100644 index db65ce9bb..000000000 --- a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_ileum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'ileum' - self.sub_tissue = 'AdultIleum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'B cell': 'B cells', - 'B cell (Plasmocyte)': 'Plasma Cells', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell', - 'Enterocyte': 'Enterocytes', - 'Enterocyte progenitor': 'Enterocytes', - 'Epithelial cell': 'Epithelial cell', - 'Fetal Neuron': 'Fetal neuron', - 'Fetal enterocyte': 'Enterocytes', - 'Fetal epithelial progenitor': 'Progenitors', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblasts', - 'Hepatocyte/Endodermal cell': 'Hepatocyte/Endodermal cell', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cells', - 'Monocyte': 'Monocyte', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Proliferating T cell': 'T cells', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "ileum", "hcl_AdultIleum_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/jejunum/__init__.py b/sfaira/data/human/jejunum/__init__.py deleted file mode 100644 index dd9a3acd4..000000000 --- a/sfaira/data/human/jejunum/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_jejunum import DatasetGroupJejunum diff --git a/sfaira/data/human/jejunum/external.py b/sfaira/data/human/jejunum/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/jejunum/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/jejunum/human_jejunum.py b/sfaira/data/human/jejunum/human_jejunum.py deleted file mode 100644 index 0d5dba57e..000000000 --- a/sfaira/data/human/jejunum/human_jejunum.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_jejunum_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupJejunum(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupJejunum - self.datasets.update(DatasetGroupJejunum(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py deleted file mode 100644 index 48507f945..000000000 --- a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py +++ /dev/null @@ -1,60 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_jejunum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'jejunum' - self.sub_tissue = 'AdultJejunum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "jejunum", "hcl_AdultJejunum_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/__init__.py b/sfaira/data/human/kidney/__init__.py deleted file mode 100644 index 4101ed3c2..000000000 --- a/sfaira/data/human/kidney/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_kidney import DatasetGroupKidney diff --git a/sfaira/data/human/kidney/external.py b/sfaira/data/human/kidney/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/kidney/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/kidney/human_kidney.py b/sfaira/data/human/kidney/human_kidney.py deleted file mode 100644 index 9d4b2e4d5..000000000 --- a/sfaira/data/human/kidney/human_kidney.py +++ /dev/null @@ -1,44 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_kidney_2019_10xSn_lake_001 import Dataset as Dataset0001 -from .human_kidney_2019_10x_stewart_001 import Dataset as Dataset0002 -from .human_kidney_2020_10x_liao_001 import Dataset as Dataset0003 -from .human_kidney_2020_microwell_han_001 import Dataset as Dataset0004 -from .human_kidney_2020_microwell_han_002 import Dataset as Dataset0005 -from .human_kidney_2020_microwell_han_003 import Dataset as Dataset0006 -from .human_kidney_2020_microwell_han_004 import Dataset as Dataset0007 -from .human_kidney_2020_microwell_han_005 import Dataset as Dataset0008 -from .human_kidney_2020_microwell_han_006 import Dataset as Dataset0009 -from .human_kidney_2020_microwell_han_007 import Dataset as Dataset0010 - - -class DatasetGroupKidney(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path), - Dataset0009(path=path, meta_path=meta_path), - Dataset0010(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupKidney - self.datasets.update(DatasetGroupKidney(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py deleted file mode 100644 index 6cdc98279..000000000 --- a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py +++ /dev/null @@ -1,95 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data files which can be obtained from the `download_website` and - `download_website_meta` attributes of this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2019_10xSn_lake_001_10.1038/s41467-019-10861-2" - self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotated%5FRaw%5FUMI%5FMatrix%2Etsv%2Egz" - self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotations%2Ecsv%2Egz" - self.organ = "kidney" - self.sub_tissue = "kidney" - self.annotated = True - - self.class_maps = { - "0": { - 'Collecting Duct - Intercalated Cells Type A (cortex)': 'Collecting Duct - Intercalated Cells Type A (cortex)', - 'Collecting Duct - Intercalated Cells Type A (medulla)': 'Collecting Duct - Intercalated Cells Type A (medulla)', - 'Collecting Duct - Intercalated Cells Type B': 'Collecting Duct - Intercalated Cells Type B', - 'Collecting Duct - PCs - Stressed Dissoc Subset': 'Collecting Duct - PCs - Stressed Dissoc Subset', - 'Collecting Duct - Principal Cells (cortex)': 'Collecting Duct - Principal Cells (cortex)', - 'Collecting Duct - Principal Cells (medulla)': 'Collecting Duct - Principal Cells (medulla)', - 'Connecting Tubule': 'Connecting tubule', - 'Decending Limb': 'Decending Limb', - 'Distal Convoluted Tubule': 'Distal Convoluted Tubule', - 'Endothelial Cells (unassigned)': 'Endothelial Cells (unassigned)', - 'Endothelial Cells - AEA & DVR ': 'Endothelial Cells - AEA & DVR', - 'Endothelial Cells - AVR': 'Endothelial Cells - AVR', - 'Endothelial Cells - glomerular capillaries': 'Endothelial Cells - glomerular capillaries', - 'Epithelial Cells (unassigned)': 'Epithelial Cells (unassigned)', - 'Immune Cells - Macrophages': 'Macrophage', - 'Interstitium': 'Interstitium', - 'Mesangial Cells': 'Mesangial Cells', - 'Podocytes': 'Podocyte', - 'Proximal Tubule Epithelial Cells (S1)': 'Proximal Tubule Epithelial Cells (S1)', - 'Proximal Tubule Epithelial Cells (S2)': 'Proximal Tubule Epithelial Cells (S2)', - 'Proximal Tubule Epithelial Cells (S3)': 'Proximal Tubule Epithelial Cells (S3)', - 'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3 )': 'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)', - 'Proximal Tubule Epithelial Cells - Stress/Inflam': 'Proximal Tubule Epithelial Cells - Stress/Inflam', - 'Thick Ascending Limb': 'Thick ascending limb of Loop of Henle', - 'Thin ascending limb': 'Thin ascending limb', - 'Unknown - Novel PT CFH+ Subpopulation (S2)': 'Unknown - Novel PT CFH+ Subpopulation (S2)', - 'Vascular Smooth Muscle Cells and pericytes': 'Vascular Smooth Muscle Cells and pericytes', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), - os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") - ] - self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t').T) - annot = pd.read_csv(fn[1], index_col=0, dtype='category') - self.adata.obs['celltype'] = [annot.loc[i.split('_')[0][1:]]['Annotation'] for i in self.adata.obs.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Jain' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-019-10861-2' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10xSn' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py deleted file mode 100644 index 3c97f7a87..000000000 --- a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py +++ /dev/null @@ -1,145 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the two raw data files which can be obtained from the `download_website` - attribute of this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2019_10x_stewart_001_10.1126/science.aat5031" - self.download_website = [ - 'https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Mature_Full_v2.1.h5ad', - 'https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Fetal_full.h5ad' - ] - self.download_website_meta = None - self.organ = "kidney" - self.sub_tissue = "renal medulla, renal pelvis, ureter, cortex of kidney" - self.annotated = True - - self.class_maps = { - "0": { - 'Ascending vasa recta endothelium': 'Endothelial Cells - AVR', - 'B cell': 'B cell', - 'CD4 T cell': 'CD4 T cell', - 'CD8 T cell': 'CD8 T cell', - 'CNT/PC - proximal UB': 'CNT/PC - proximal UB', - 'Cap mesenchyme': 'Cap mesenchyme', - 'Connecting tubule': 'Connecting tubule', - 'Descending vasa recta endothelium': 'Endothelial Cells - AEA & DVR', - 'Distal S shaped body': 'Distal S shaped body', - 'Distal renal vesicle': 'Distal renal vesicle', - 'Distinct proximal tubule 1': 'Distinct proximal tubule 1', - 'Distinct proximal tubule 2': 'Distinct proximal tubule 2', - 'Endothelium': 'Endothelial Cells (unassigned)', - 'Epithelial progenitor cell': 'Epithelial progenitor', - 'Erythroid': 'Erythroid', - 'Fibroblast': 'Fibroblast', - 'Fibroblast 1': 'Fibroblast', - 'Fibroblast 2': 'Fibroblast', - 'Glomerular endothelium': 'Endothelial Cells - glomerular capillaries', - 'Indistinct intercalated cell': 'Indistinct intercalated cell', - 'Innate like lymphocyte': 'Innate like lymphocyte', - 'Loop of Henle': 'Loop of Henle', - 'MNP-a/classical monocyte derived': 'MNP-a/classical monocyte derived', - 'MNP-b/non-classical monocyte derived': 'MNP-b/non-classical monocyte derived', - 'MNP-c/dendritic cell': 'MNP-c/dendritic cell', - 'MNP-d/Tissue macrophage': 'MNP-d/Tissue macrophage', - 'Macrophage 1': 'Macrophage', - 'Macrophage 2': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Mast cells': 'Mast cell', - 'Medial S shaped body': 'Medial S shaped body', - 'Megakaryocyte': 'Megakaryocyte', - 'Monocyte': 'Monocyte', - 'Myofibroblast': 'Myofibroblast', - 'Myofibroblast 1': 'Myofibroblast', - 'Myofibroblast 2': 'Myofibroblast', - 'NK cell': 'NK cell', - 'NKT cell': 'NKT cell', - 'Neuron': 'Neuron', - 'Neutrophil': 'Neutrophil', - 'Pelvic epithelium': 'Pelvic epithelium', - 'Pelvic epithelium - distal UB': 'Pelvic epithelium - distal UB', - 'Peritubular capillary endothelium 1': 'Peritubular capillary endothelium 1', - 'Peritubular capillary endothelium 2': 'Peritubular capillary endothelium 2', - 'Plasmacytoid dendritic cell': 'Plasmacytoid dendritic cell', - 'Podocyte': 'Podocyte', - 'Principal cell': 'Principal cell', - 'Proliferating B cell': 'Proliferating B cell', - 'Proliferating NK cell': 'Proliferating NK cell', - 'Proliferating Proximal Tubule': 'Proliferating Proximal Tubule', - 'Proliferating cDC2': 'Proliferating cDC2', - 'Proliferating cap mesenchyme': 'Proliferating cap mesenchyme', - 'Proliferating distal renal vesicle': 'Proliferating distal renal vesicle', - 'Proliferating fibroblast': 'Proliferating fibroblast', - 'Proliferating macrophage': 'Proliferating macrophage', - 'Proliferating monocyte': 'Proliferating monocyte', - 'Proliferating myofibroblast': 'Proliferating myofibroblast', - 'Proliferating stroma progenitor': 'Proliferating stroma progenitor', - 'Proximal S shaped body': 'Proximal S shaped body', - 'Proximal UB': 'Proximal UB', - 'Proximal renal vesicle': 'Proximal renal vesicle', - 'Proximal tubule': 'Proximal tubule', - 'Stroma progenitor': 'Stroma progenitor', - 'Thick ascending limb of Loop of Henle': 'Thick ascending limb of Loop of Henle', - 'Transitional urothelium': 'Transitional urothelium', - 'Type A intercalated cell': 'Type A intercalated cell', - 'Type B intercalated cell': 'Collecting Duct - Intercalated Cells Type B', - 'cDC1': 'cDC1', - 'cDC2': 'cDC2', - 'pDC': 'pDC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "kidney", "Mature_Full_v2.1.h5ad"), - os.path.join(self.path, "human", "kidney", "Fetal_full.h5ad") - ] - adult = anndata.read(fn[0]) - fetal = anndata.read(fn[1]) - adult.obs['development'] = 'adult' - fetal.obs['development'] = 'fetal' - self.adata = adult.concatenate(fetal) - self.adata.X = np.expm1(self.adata.X) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Clatworthy' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1126/science.aat5031' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["celltype"] - self.adata.obs["cell_ontology_id"] = None - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='ID') diff --git a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py deleted file mode 100644 index 99a147d75..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py +++ /dev/null @@ -1,115 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd -import scipy.io -import gzip -import tarfile - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data file if `load_raw=True` is passed to self.load() - To download the datafile required by this dataloader, use the link provided as the `download_website` attribute of - this class. For (up to 100-fold faster) repeated data loading, please pass `load_raw=False` when calling the - self.load() method. For this, you need to preprocess the raw files as below and place the resulting h5ad file in the - data folder of this organ: - - import anndata - import pandas as pd - import scipy.io - import gzip - import tarfile - adatas = [] - with tarfile.open("GSE131685_RAW.tar") as tar: - for member in tar.getmembers(): - if '_matrix.mtx.gz' in member.name: - name = '_'.join(member.name.split('_')[:-1]) - with gzip.open(tar.extractfile(member), 'rb') as mm: - X = scipy.io.mmread(mm).T.tocsr() - obs = pd.read_csv(tar.extractfile(name+'_barcodes.tsv.gz'), compression='gzip', header=None, sep='\t', index_col=0) - obs.index.name = None - var = pd.read_csv(tar.extractfile(name+'_features.tsv.gz'), compression='gzip', header=None, sep='\t').iloc[:,:2] - var.columns = ['ensembl', 'names'] - var.index = var['ensembl'].values - adata = anndata.AnnData(X=X, obs=obs, var=var) - adata.obs['sample'] = name - adatas.append(adata) - adata = adatas[0].concatenate(adatas[1:]) - del adata.obs['batch'] - adata.write('GSE131685.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_10x_liao_001_10.1038/s41597-019-0351-8" - self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" - self.download_website_meta = None - self.organ = "kidney" - self.sub_tissue = "kidney" - self.annotated = False - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "GSE131685_RAW.tar") - adatas = [] - with tarfile.open(fn) as tar: - for member in tar.getmembers(): - if '_matrix.mtx.gz' in member.name: - name = '_'.join(member.name.split('_')[:-1]) - with gzip.open(tar.extractfile(member), 'rb') as mm: - X = scipy.io.mmread(mm).T.tocsr() - obs = pd.read_csv(tar.extractfile(name + '_barcodes.tsv.gz'), compression='gzip', header=None, - sep='\t', index_col=0) - obs.index.name = None - var = pd.read_csv(tar.extractfile(name + '_features.tsv.gz'), compression='gzip', header=None, - sep='\t').iloc[:, :2] - var.columns = ['ensembl', 'names'] - var.index = var['ensembl'].values - self.adata = anndata.AnnData(X=X, obs=obs, var=var) - self.adata.obs['sample'] = name - adatas.append(self.adata) - self.adata = adatas[0].concatenate(adatas[1:]) - del self.adata.obs['batch'] - - else: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "GSE131685.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Mo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41597-019-0351-8' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = None - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py deleted file mode 100644 index ffea57f1b..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'AdultKidney' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py deleted file mode 100644 index 353dae669..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'AdultKidney' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py deleted file mode 100644 index 94c6bf3b8..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'AdultKidney' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py deleted file mode 100644 index 264ec8857..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'FetalKidney' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py deleted file mode 100644 index 68079fd52..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'FetalKidney' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py deleted file mode 100644 index 10699d7a8..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'FetalKidney' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_5.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py deleted file mode 100644 index 91667d873..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_007_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'FetalKidney' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_6.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/__init__.py b/sfaira/data/human/liver/__init__.py deleted file mode 100644 index 2014f8490..000000000 --- a/sfaira/data/human/liver/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_liver import DatasetGroupLiver diff --git a/sfaira/data/human/liver/external.py b/sfaira/data/human/liver/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/liver/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/liver/human_liver.py b/sfaira/data/human/liver/human_liver.py deleted file mode 100644 index 278a6a4dc..000000000 --- a/sfaira/data/human/liver/human_liver.py +++ /dev/null @@ -1,42 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_liver_2018_10x_macparland_001 import Dataset as Dataset0001 -from .human_liver_2019_10x_popescu_001 import Dataset as Dataset0002 -from .human_liver_2019_10x_ramachandran_001 import Dataset as Dataset0003 -from .human_liver_2019_mCELSeq2_aizarani_001 import Dataset as Dataset0004 -from .human_liver_2020_microwell_han_001 import Dataset as Dataset0005 -from .human_liver_2020_microwell_han_002 import Dataset as Dataset0006 -from .human_liver_2020_microwell_han_003 import Dataset as Dataset0007 -from .human_liver_2020_microwell_han_004 import Dataset as Dataset0008 -from .human_liver_2020_microwell_han_005 import Dataset as Dataset0009 - - -class DatasetGroupLiver(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path), - Dataset0009(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupLiver - self.datasets.update(DatasetGroupLiver(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py deleted file mode 100644 index 5828651a0..000000000 --- a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - The input files for this dataloader (GSE115469.csv.gz and GSE115469_labels.txt) were kindly provided to us by the - authors of the publication. Please contact them directly to obtain the required - files. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2018_10x_macparland_001_10.1038/s41467-018-06318-7" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE115469" - self.download_website_meta = 'private' - self.organ = "liver" - self.sub_tissue = "caudate lobe" - self.annotated = True - - self.class_maps = { - "0": { - '1':'Hepatocyte 1', - '2':'Alpha beta T cells', - '3':'Hepatocyte 2', - '4':'Inflammatory macrophages', - '5':'Hepatocyte 3', - '6':'Hepatocyte 4', - '7':'Plasma cells', - '8':'NK cell', - '9':'Gamma delta T cells 1', - '10':'Non inflammatory macrophages', - '11':'Periportal LSECs', - '12':'Central venous LSECs', - '13':'Endothelial cell', - '14':'Hepatocyte 5', - '15':'Hepatocyte 6', - '16':'Mature B cells', - '17':'Cholangiocytes', - '18':'Gamma delta T cells 2', - '19':'Erythroid cells', - '20':'Hepatic stellate cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "liver", "GSE115469.csv.gz"), - os.path.join(self.path, "human", "liver", "GSE115469_labels.txt") - ] - self.adata = anndata.read_csv(fn[0]).T - celltype_df = pd.read_csv(fn[1], sep='\t').set_index('CellName') - self.adata.obs['celltype'] = [str(celltype_df.loc[i]['Cluster#']) for i in self.adata.obs.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'McGilvray' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-018-06318-7' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py deleted file mode 100644 index d335ed46d..000000000 --- a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - The input file for this dataloader (fetal_liver_alladata_.h5ad) was kindly provided to us by the - authors of the publication. Please contact them directly to obtain the required file. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2019_10x_popescu_001_10.1038/s41586-019-1652-y" - self.download_website = "https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-7407/" - self.download_website_meta = 'private' - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell': 'Mature B cells', - 'DC1': 'Dendritic cell 1', - 'DC2': 'Dendritic cell 2', - 'DC precursor': 'Dendritic cell precursor', - 'Early Erythroid': 'Early Erythroid', - 'Early lymphoid_T lymphocyte': 'Early lymphoid T lymphocyte', - 'Endothelial cell': 'Endothelial cell', - 'Fibroblast': 'Fibroblast', - 'HSC_MPP': 'HSC MPP', - 'Hepatocyte': 'Hepatocyte', - 'ILC precursor': 'ILC precursor', - 'Kupffer Cell': 'Kupffer Cell', - 'Late Erythroid': 'Late Erythroid', - 'MEMP': 'MEMP', - 'Mast cell': 'Mast cell', - 'Megakaryocyte': 'Megakaryocyte', - 'Mid Erythroid': 'Mid Erythroid', - 'Mono-Mac': 'Mono Macrophage', - 'Monocyte': 'Monocyte', - 'Monocyte precursor': 'Monocyte precursor', - 'NK': 'NK cell', - 'Neutrophil-myeloid progenitor': 'Neutrophil myeloid progenitor', - 'Pre pro B cell': 'Pre pro B cell', - 'VCAM1+ EI macrophage': 'VCAM1pos EI macrophage', - 'pDC precursor': 'pDendritic cell precursor', - 'pre-B cell': 'pre B cell', - 'pro-B cell': 'pro B cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "fetal_liver_alladata_.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Haniffa' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1652-y' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["cell.labels"] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py deleted file mode 100644 index 0e9623afa..000000000 --- a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py +++ /dev/null @@ -1,91 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This dataloader requires manual preprocessing of the Rdata file that can be obtained from the link in the - `download_website` attribute of this class. The preprocessing code below uses the rpy2 and anndata2ri python - packages to convert the R object to anndata (pip install anndata2ri), run it in a jupyter notebook: - - ## Notebook Cell 1 - import anndata2ri - anndata2ri.activate() - %load_ext rpy2.ipython - - ## Notebook Cell 2 - %%R -o sce - library(Seurat) - load('tissue.rdata') - new_obj = CreateSeuratObject(counts = tissue@raw.data) - new_obj@meta.data = tissue@meta.data - sce <- as.SingleCellExperiment(new_obj) - - ## Notebook cell 3 - sce.write('ramachandran.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2019_10x_ramachandran_001_10.1038/s41586-019-1631-3" - self.download_website = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" - self.download_website_meta = None - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": { - 'MPs': 'MP', - 'Tcells': 'Tcells', - 'ILCs': 'ILC', - 'Endothelia': 'Endothelia', - 'Bcells': 'Bcells', - 'pDCs': 'pDCs', - 'Plasma Bcells': 'Plasma B cell', - 'Mast cells': 'Mast cell', - 'Mesenchyme': 'Mesenchyme', - 'Cholangiocytes': 'Cholangiocytes', - 'Hepatocytes': 'Hepatocytes', - 'Mesothelia': 'Mesothelia', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "ramachandran.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Henderson' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1631-3' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["annotation_lineage"] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = [i == 'Uninjured' for i in self.adata.obs["condition"]] - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = ['healthy' if i == 'Uninjured' else i for i in self.adata.obs["condition"]] - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py deleted file mode 100644 index 19e9e2783..000000000 --- a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py +++ /dev/null @@ -1,108 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2019_mCELSeq2_aizarani_001_10.1038/s41586-019-1373-2" - self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5FNormalhumanlivercellatlasdata%2Etxt%2Egz" - self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5Fclusterpartition%2Etxt%2Egz" - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": { - '1': 'NK, NKT and T cells', - '2': 'Kupffer Cell', - '3': 'NK, NKT and T cells', - '4': 'Cholangiocytes', - '5': 'NK, NKT and T cells', - '6': 'Kupffer Cell', - '7': 'Cholangiocytes', - '8': 'B Cell', - '9': 'Liver sinusoidal endothelial cells', - '10': 'Macrovascular endothelial cells', - '11': 'Hepatocyte', - '12': 'NK, NKT and T cells', - '13': 'Liver sinusoidal endothelial cells', - '14': 'Hepatocyte', - '15': 'Other endothelial cells', - '16': 'Unknown', - '17': 'Hepatocyte', - '18': 'NK, NKT and T cells', - '19': 'Unknown', - '20': 'Liver sinusoidal endothelial cells', - '21': 'Macrovascular endothelial cells', - '22': 'B Cell', - '23': 'Kupffer Cell', - '24': 'Cholangiocytes', - '25': 'Kupffer Cell', - '26': 'Other endothelial cells', - '27': 'Unknown', - '28': 'NK, NKT and T cells', - '29': 'Macrovascular endothelial cells', - '30': 'Hepatocyte', - '31': 'Kupffer Cell', - '32': 'Liver sinusoidal endothelial cells', - '33': 'Hepatic stellate cells', - '34': 'B Cell', - '35': 'Other endothelial cells', - '36': 'Unknown', - '37': 'Unknown', - '38': 'B Cell', - '39': 'Cholangiocytes' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "liver", "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), - os.path.join(self.path, "human", "liver", "GSE124395_clusterpartition.txt.gz") - ] - self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t').T) - celltype_df = pd.read_csv(fn[1], sep=' ') - self.adata = self.adata[[i in celltype_df.index for i in self.adata.obs.index]].copy() - self.adata.obs['CellType'] = [str(celltype_df.loc[i]['sct@cpart']) for i in self.adata.obs.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Gruen' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1373-2' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'mCEL-Seq2' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs['cell_ontology_class'] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py deleted file mode 100644 index ed46c5da4..000000000 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Liver' - self.sub_tissue = 'AdultLiver' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'Plasma B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Late Erythroid', - 'Erythroid progenitor cell (RP high)': 'Early Erythroid', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Non inflammatory macrophages', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Sinusoidal endothelial cell': 'Liver sinusoidal endothelial cells', - 'Smooth muscle cell': 'Smooth muscle cell', - 'T cell': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py deleted file mode 100644 index ba74db0ba..000000000 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'Liver' - self.sub_tissue = 'AdultLiver' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'Plasma B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Late Erythroid', - 'Erythroid progenitor cell (RP high)': 'Early Erythroid', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Non inflammatory macrophages', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Sinusoidal endothelial cell': 'Liver sinusoidal endothelial cells', - 'Smooth muscle cell': 'Smooth muscle cell', - 'T cell': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py deleted file mode 100644 index 9ecdc5456..000000000 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'Liver' - self.sub_tissue = 'AdultLiver' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'Plasma B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Late Erythroid', - 'Erythroid progenitor cell (RP high)': 'Early Erythroid', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Non inflammatory macrophages', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Sinusoidal endothelial cell': 'Liver sinusoidal endothelial cells', - 'Smooth muscle cell': 'Smooth muscle cell', - 'T cell': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py deleted file mode 100644 index 6f8003ab9..000000000 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'Liver' - self.sub_tissue = 'FetalLiver' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'Plasma B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Late Erythroid', - 'Erythroid progenitor cell (RP high)': 'Early Erythroid', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Non inflammatory macrophages', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Sinusoidal endothelial cell': 'Liver sinusoidal endothelial cells', - 'Smooth muscle cell': 'Smooth muscle cell', - 'T cell': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "hcl_Liver_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py deleted file mode 100644 index a4909b27f..000000000 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'Liver' - self.sub_tissue = 'FetalLiver' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'Plasma B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Late Erythroid', - 'Erythroid progenitor cell (RP high)': 'Early Erythroid', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Non inflammatory macrophages', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Sinusoidal endothelial cell': 'Liver sinusoidal endothelial cells', - 'Smooth muscle cell': 'Smooth muscle cell', - 'T cell': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "hcl_Liver_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/lung/__init__.py b/sfaira/data/human/lung/__init__.py deleted file mode 100644 index fafe9671b..000000000 --- a/sfaira/data/human/lung/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_lung import DatasetGroupLung diff --git a/sfaira/data/human/lung/external.py b/sfaira/data/human/lung/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/lung/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/lung/human_lung.py b/sfaira/data/human/lung/human_lung.py deleted file mode 100644 index fc29e35a3..000000000 --- a/sfaira/data/human/lung/human_lung.py +++ /dev/null @@ -1,53 +0,0 @@ -from typing import Union - -from .external import DatasetGroupBase - -from .human_lung_2019_10x_braga_001 import Dataset as Dataset0001 -from .human_lung_2019_10x_braga_002 import Dataset as Dataset0002 -from .human_lung_2019_10x_madissoon_001 import Dataset as Dataset0003 -from .human_lung_2019_dropseq_braga_003 import Dataset as Dataset0004 -from .human_lung_2020_10x_habermann_001 import Dataset as Dataset0005 -from .human_lung_2020_10x_lukassen_001 import Dataset as Dataset0006 -from .human_lung_2020_10x_lukassen_002 import Dataset as Dataset0007 -from .human_lung_2020_10x_miller_001 import Dataset as Dataset0008 -from .human_lung_2020_10x_travaglini_001 import Dataset as Dataset0009 -from .human_lung_2020_microwell_han_001 import Dataset as Dataset0010 -from .human_lung_2020_microwell_han_002 import Dataset as Dataset0011 -from .human_lung_2020_microwell_han_003 import Dataset as Dataset0012 -from .human_lung_2020_microwell_han_004 import Dataset as Dataset0013 -from .human_lung_2020_microwell_han_005 import Dataset as Dataset0014 -from .human_lung_2020_smartseq2_travaglini_002 import Dataset as Dataset0015 - - -class DatasetGroupLung(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path), - Dataset0009(path=path, meta_path=meta_path), - Dataset0010(path=path, meta_path=meta_path), - Dataset0011(path=path, meta_path=meta_path), - Dataset0012(path=path, meta_path=meta_path), - Dataset0013(path=path, meta_path=meta_path), - Dataset0014(path=path, meta_path=meta_path), - Dataset0015(path=path, meta_path=meta_path), - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupLung - self.datasets.update(DatasetGroupLung(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py deleted file mode 100644 index a9119aaa8..000000000 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py +++ /dev/null @@ -1,84 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2019_10x_braga_001_10.1038/s41591-019-0468-5" - self.download_website = "https://covid19.cog.sanger.ac.uk/" \ - "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "alveoli, parenchyma" - self.annotated = True - - self.class_maps = { - "0": { - 'Ciliated 2': 'Multiciliated lineage', - 'Luminal_Macrophages': 'Macrophages', - 'Basal 1': 'Basal', - 'Dendritic cells': 'Dendritic cells', - 'Endothelial': '1_Endothelial', - 'Lymphatic': 'Lymphatic EC', - 'Ciliated 1': 'Multiciliated lineage', - 'Smooth muscle': '2_Smooth Muscle', - 'Type_1_alveolar': 'AT1', - 'Neutrophils': 'Monocytes', - 'Club': 'Secretory', - 'Basal 2': 'Basal', - 'B cells': 'B cell lineage', - 'T and NK': '2_Lymphoid', - 'Mesothelium': 'Mesothelium', - 'Mast cells': 'Mast cells', - 'Fibroblasts': '2_Fibroblast lineage', - 'Type 2 alveolar': 'AT2', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py deleted file mode 100644 index 2ce4619c1..000000000 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py +++ /dev/null @@ -1,84 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2019_10x_braga_002_10.1038/s41591-019-0468-5" - self.download_website = "https://covid19.cog.sanger.ac.uk/" \ - "vieira19_Bronchi_anonymised.processed.h5ad" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "bronchi" - self.annotated = True - - self.class_maps = { - "0": { - 'Ciliated 1': 'Multiciliated lineage', - 'Club': 'Secretory', - 'Ciliated 2': 'Multiciliated lineage', - 'Ionocytes': 'Rare', - 'Basal 2': 'Basal', - 'Goblet_1': 'Secretory', - 'Goblet 2': 'Secretory', - 'Basal 1': 'Basal', - 'Dendritic cells': 'Dendritic cells', - 'B cells': 'B cell lineage', - 'Luminal_Macrophages': 'Macrophages', - 'Neutrophils': 'Monocytes', - 'Endothelial': '1_Endothelial', - 'Smooth muscle': '2_Smooth Muscle', - 'T and NK': '2_Lymphoid', - 'Fibroblasts': '2_Fibroblast lineage', - 'Lymphatic': 'Lymphatic EC', - 'Mast cells': 'Mast cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "vieira19_Bronchi_anonymised.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py deleted file mode 100644 index 4327033ee..000000000 --- a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2019_10x_madissoon_001._10.1186/s13059-019-1906-x" - self.download_website = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "parenchyma" - self.annotated = True - - self.class_maps = { - "0": { - 'T_CD4': 'T cell lineage', - 'Mast_cells': 'Mast cells', - 'Monocyte': 'Monocytes', - 'Blood_vessel': '2_Blood vessels', - 'Ciliated': 'Multiciliated lineage', - 'Macrophage_MARCOneg': 'Macrophages', - 'DC_plasmacytoid': 'Dendritic cells', - 'DC_1': 'Dendritic cells', - 'Muscle_cells': '2_Smooth Muscle', - 'Macrophage_MARCOpos': 'Macrophages', - 'T_cells_Dividing': 'T cell lineage', - 'DC_Monocyte_Dividing': 'Dendritic cells', - 'B_cells': 'B cell lineage', - 'T_CD8_CytT': 'T cell lineage', - 'NK_Dividing': 'Innate lymphoid cells', - 'T_regulatory': 'T cell lineage', - 'DC_2': 'Dendritic cells', - 'Alveolar_Type2': 'AT2', - 'Plasma_cells': 'B cell lineage', - 'NK': 'Innate lymphoid cells', - 'Alveolar_Type1': 'AT1', - 'Fibroblast': '2_Fibroblast lineage', - 'DC_activated': 'Dendritic cells', - 'Macrophage_Dividing': 'Macrophages', - 'Lymph_vessel': 'Lymphatic EC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "madissoon19_lung.processed.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Meyer' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1186/s13059-019-1906-x" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene.ids.HCATisStab7509734') diff --git a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py deleted file mode 100644 index 6fadd1a90..000000000 --- a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - This data loader directly processes the two raw data files which can be obtained from the `download_website` - and `download_website_meta` attributes of this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2019_dropseq_braga_003_10.1038/s41591-019-0468-5" - self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fraw%5Fcounts%2Ecsv%2Egz" - self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fbarcodes%5Fcell%5Ftypes%2Etxt%2Egz" - self.organ = "lung" - self.sub_tissue = "parenchymal lung and distal airway specimens" - self.annotated = True - - self.class_maps = { - "0": { - 'Fibroblast': 'Fibroblasts', - 'Type 2': 'AT2', - 'B cell': 'B cell lineage', - 'Macrophages': 'Macrophages', - 'NK cell': 'Innate lymphoid cells', - 'T cell': 'T cell lineage', - 'Ciliated': 'Multiciliated lineage', - 'Lymphatic': 'Lymphatic EC', - 'Type 1': 'AT1', - 'Transformed epithelium': '1_Epithelial', - 'Secretory': 'Secretory', - 'Endothelium': '1_Endothelial', - 'Mast cell': 'Mast cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "lung", "GSE130148_raw_counts.csv.gz"), - os.path.join(self.path, "human", "lung", "GSE130148_barcodes_cell_types.txt.gz"), - ] - self.adata = anndata.read_csv(fn[0]).T - self.adata.obs = pd.read_csv(fn[1], sep='\t', index_col=0) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'dropseq' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = [self.download_website, self.download_website_meta] - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'uninvolved areas of tumour resection material' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py deleted file mode 100644 index 5f5872577..000000000 --- a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py +++ /dev/null @@ -1,126 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data files if `load_raw=True` is passed to self.load() - To download the datafiles required by this dataloader, use the links provided as the `download_website` and - `download_website_meta` attribute of this class. For (up to 100-fold faster) repeated data loading, please pass - `load_raw=False` when calling the self.load() method. For this, you need to preprocess the raw files as below and - place the resulting h5ad file in the data folder of this organ: - - import anndata - import pandas as pd - adata = anndata.read_mtx('GSE135893_matrix.mtx.gz').T - adata.var = pd.read_csv('GSE135893_genes.tsv.gz', index_col=0, header=None, names=['ids']) - adata.obs = pd.read_csv('GSE135893_barcodes.tsv.gz', index_col=0, header=None, names=['barcodes']) - obs = pd.read_csv('GSE135893_IPF_metadata.csv.gz', index_col=0) - adata = adata[obs.index.tolist(),:].copy() - adata.obs = obs - adata.write('habermann_processed.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_10x_habermann_001_10.1101/753806" - self.download_website = [ - "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fmatrix%2Emtx%2Egz", - "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fgenes%2Etsv%2Egz", - "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fbarcodes%2Etsv%2Egz" - ] - self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5FIPF%5Fmetadata%2Ecsv%2Egz" - self.organ = "lung" - self.sub_tissue = "parenchyma" - self.annotated = True - - self.class_maps = { - "0": { - 'Proliferating Macrophages': 'Macrophages', - 'Myofibroblasts': 'Myofibroblasts', - 'Proliferating Epithelial Cells': 'Proliferating Epithelial Cells', - 'Mesothelial Cells': 'Mesothelium', - 'cDCs': 'Dendritic cells', - 'Mast Cells': 'Mast cells', - 'Ciliated': 'Multiciliated lineage', - 'T Cells': 'T cell lineage', - 'pDCs': 'Dendritic cells', - 'Smooth Muscle Cells': '2_Smooth Muscle', - 'Transitional AT2': 'AT2', - 'AT2': 'AT2', - 'B Cells': 'B cell lineage', - 'NK Cells': 'Innate lymphoid cells', - 'Monocytes': 'Monocytes', - 'Basal': 'Basal', - 'Plasma Cells': 'B cell lineage', - 'Differentiating Ciliated': 'Multiciliated lineage', - 'Macrophages': 'Macrophages', - 'MUC5B+': 'Secretory', - 'SCGB3A2+': 'Secretory', - 'Fibroblasts': 'Fibroblasts', - 'Lymphatic Endothelial Cells': 'Lymphatic EC', - 'Endothelial Cells': '2_Blood vessels', - 'SCGB3A2+ SCGB1A1+': 'Secretory', - 'PLIN2+ Fibroblasts': 'Fibroblasts', - 'KRT5-/KRT17+': 'KRT5-/KRT17+', - 'MUC5AC+ High': 'Secretory', - 'Proliferating T Cells': 'T cell lineage', - 'AT1': 'AT1', - 'HAS1 High Fibroblasts': 'Fibroblasts' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "lung", "GSE135893_matrix.mtx.gz"), - os.path.join(self.path, "human", "lung", "GSE135893_genes.tsv.gz"), - os.path.join(self.path, "human", "lung", "GSE135893_barcodes.tsv.gz"), - os.path.join(self.path, "human", "lung", "GSE135893_IPF_metadata.csv.gz"), - ] - self.adata = anndata.read_mtx(fn[0]).T - self.adata.var = pd.read_csv(fn[1], index_col=0, header=None, names=['ids']) - self.adata.obs = pd.read_csv(fn[2], index_col=0, header=None, names=['barcodes']) - obs = pd.read_csv(fn[3], index_col=0) - self.adata = self.adata[obs.index.tolist(), :].copy() - self.adata.obs = obs - else: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "habermann_processed.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Kropski' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/753806" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = [i == 'Control' for i in self.adata.obs['Status']] - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['Diagnosis'].astype('category') - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py deleted file mode 100644 index 8ad57e976..000000000 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_10x_lukassen_001_10.1101/2020.03.13.991455" - self.download_website = "https://covid19.cog.sanger.ac.uk/lukassen20_lung_orig.processed.h5ad" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "lung" - self.annotated = True - - self.class_maps = { - "0": { - 'Ciliated': 'Multiciliated lineage', - 'Endothelial': '1_Endothelial', - 'AT2': 'AT2', - 'LymphaticEndothelium': 'Lymphatic EC', - 'Fibroblasts': '2_Fibroblast lineage', - 'Club': 'Secretory', - 'Immuno_TCells': 'T cell lineage', - 'Immuno_Monocytes': 'Monocytes', - 'AT1': 'AT1' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "lukassen20_lung_orig.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Eils' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/2020.03.13.991455" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py deleted file mode 100644 index 7ecfbaf49..000000000 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_10x_lukassen_002_10.1101/2020.03.13.991455" - self.download_website = "https://covid19.cog.sanger.ac.uk/lukassen20_airway_orig.processed.h5ad" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "bronchial epithelial cells" - self.annotated = True - - self.class_maps = { - "0": { - 'Secretory3': 'Secretory', - 'Ciliated1': 'Multiciliated lineage', - 'Goblet': 'Secretory', - 'Ciliated2': 'Multiciliated lineage', - 'Club': 'Secretory', - 'Secretory2': 'Secretory', - 'FOXN4': 'Rare', - 'Basal1': 'Basal', - 'Secretory1': 'Secretory', - 'Fibroblast': '2_Fibroblast lineage', - 'Ionocyte': 'Rare', - 'Basal3': 'Basal', - 'Basal_Mitotic': 'Basal', - 'Basal2': 'Basal', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "lukassen20_airway_orig.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Eils' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/2020.03.13.991455" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py deleted file mode 100644 index 955e85b9e..000000000 --- a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_10x_miller_001_10.1016/j.devcel.2020.01.033" - self.download_website = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "fetal lung" - self.annotated = True - - self.class_maps = { - "0": { - 'Airway Smooth Muscle': 'Airway smooth muscle', - 'Basal cell': 'Basal', - 'Bud tip adjacent': 'Fetal airway progenitors', - 'Bud tip progenitor': 'Fetal airway progenitors', - 'Cartilage': 'Cartilage', - 'Club-like secretory': 'Secretory', - 'Endothelial': '1_Endothelial', - 'Epithelial': '1_Epithelial', - 'Goblet-like secretory': 'Secretory', - 'Hematopoietic, B Cells': 'B cell lineage', - 'Hematopoietic, Macrophage': 'Macrophages', - 'Hematopoietic, Natural Killer Cell': 'Innate lymphoid cells', - 'Hematopoietic, T Cells': 'T cell lineage', - 'Immune': '1_Immune', - 'Intermediate ciliated': 'Multiciliated lineage', - 'Mesenchyme RSPO2+': '1_Stroma', - 'Mesenchyme SERPINF1-high': '1_Stroma', - 'Multiciliated cell': 'Multiciliated lineage', - 'Multiciliated precursor': 'Multiciliated lineage', - 'Neuroendocrine': 'Rare', - 'Pericyte': 'Fibroblasts', - 'RBC': 'Erythrocytes', - 'Secretory progenitor': 'Secretory', - 'Submucosal gland': 'Submucosal Secretory', - 'Submucosal gland basal': 'Submucosal Secretory', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "miller20.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Spence' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.devcel.2020.01.033" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Cell_type'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py deleted file mode 100644 index 630f99090..000000000 --- a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py +++ /dev/null @@ -1,135 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import scipy.sparse -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the data file provided by the authors. To obtain the file, you need to create a - free account at https://www.synapse.org. You can then use those login credentials to download the file with python - using the synapse client, installable via `pip install synapseclient`: - - import synapseclient - import shutil - syn = synapseclient.Synapse() - syn.login('synapse_username','password') - syn21625095 = syn.get(entity='syn21625095') - shutil.move(syn21625095.path, 'droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_10x_travaglini_001_10.1038/s41586-020-2922-4" - self.download_website = "https://www.synapse.org/#!Synapse:syn21041850" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "proximal, medial, distal, blood" - self.annotated = True - - self.class_maps = { - "0": { - 'Intermediate Monocyte': 'Monocytes', - 'Adventitial Fibroblast': 'Fibroblasts', - 'Myeloid Dendritic Type 1': 'Dendritic cells', - 'Myofibroblast': 'Myofibroblasts', - 'Bronchial Vessel 2': 'Bronchial Vessel 2', - 'Fibromyocyte': 'Fibromyocyte', - 'Basal': 'Basal', - 'IGSF21+ Dendritic': 'Macrophages', - 'CD8+ Memory/Effector T': 'T cell lineage', - 'CD4+ Naive T': 'T cell lineage', - 'Myeloid Dendritic Type 2': 'Dendritic cells', - 'Neuroendocrine': 'Rare', - 'Ciliated': 'Multiciliated lineage', - 'Proximal Ciliated': 'Multiciliated lineage', - 'Proliferating Basal': 'Basal', - 'Proximal Basal': 'Basal', - 'Nonclassical Monocyte': 'Monocytes', - 'Proliferating Macrophage': 'Macrophages', - 'Plasmacytoid Dendritic': 'Dendritic cells', - 'Vein': 'Venous', - 'Basophil/Mast 1': 'Mast cells', - 'Serous': 'Submucosal Secretory', - 'Natural Killer T': 'T cell lineage', - 'Mesothelial': 'Mesothelium', - 'Ionocyte': 'Rare', - 'Bronchial Vessel 1': 'Bronchial Vessel 1', - 'Natural Killer': 'Innate lymphoid cells', - 'Capillary Aerocyte': 'Capillary', - 'Vascular Smooth Muscle': '2_Smooth Muscle', - 'Macrophage': 'Macrophages', - 'Basophil/Mast 2': 'Mast cells', - 'Platelet/Megakaryocyte': 'Megakaryocytes', - 'Pericyte': 'Fibroblasts', - 'Capillary Intermediate 2': 'Capillary Intermediate 2', - 'CD4+ Memory/Effector T': 'T cell lineage', - 'B': 'B cell lineage', - 'Lymphatic': 'Lymphatic EC', - 'Mucous': 'Submucosal Secretory', - 'Signaling Alveolar Epithelial Type 2': 'AT2', - 'Alveolar Epithelial Type 1': 'AT1', - 'OLR1+ Classical Monocyte': 'Monocytes', - 'Plasma': 'B cell lineage', - 'Lipofibroblast': 'Fibroblasts', - 'Capillary Intermediate 1': 'Capillary Intermediate 1', - 'EREG+ Dendritic': 'Macrophages', - 'Capillary': 'Capillary', - 'TREM2+ Dendritic': 'Macrophages', - 'Alveolar Fibroblast': 'Fibroblasts', - 'Classical Monocyte': 'Monocytes', - 'Goblet': 'Secretory', - 'Airway Smooth Muscle': 'Airway smooth muscle', - 'Club': 'Secretory', - 'Proliferating NK/T': 'Innate lymphoid cells', - 'Alveolar Epithelial Type 2': 'AT2', - 'Differentiating Basal': 'Basal', - 'CD8+ Naive T': 'T cell lineage', - 'Artery': 'Arterial' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") - self.adata = anndata.read(fn) - self.adata.X = scipy.sparse.csc_matrix(self.adata.X) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None])) \ - .multiply(1 / 10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Krasnow' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41586-020-2922-4" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].astype('category') - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py deleted file mode 100644 index 99e467b2e..000000000 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py +++ /dev/null @@ -1,112 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'lung' - self.sub_tissue = 'FetalLung' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'AT2 cell': 'AT2', - 'Antigen presenting cell (RPS high)': 'unknown', - 'B cell': 'B cell lineage', - 'B cell (Plasmocyte)': 'B cell lineage', - 'Basal cell': 'Basal', - 'CB CD34+': 'Fetal airway progenitors', - 'Chondrocyte': '1_Stroma', - 'Dendritic cell': 'Dendritic cells', - 'Endothelial cell': '1_Endothelial', - 'Endothelial cell (APC)': '1_Endothelial', - 'Endothelial cell (endothelial to mesenchymal transition)': '1_Endothelial', - 'Enterocyte progenitor': '1_Epithelial', - 'Epithelial cell': '1_Epithelial', - 'Epithelial cell (intermediated)': '1_Epithelial', - 'Erythroid cell': 'Erythrocytes', - 'Erythroid progenitor cell (RP high)': 'Erythrocytes', - 'Fasciculata cell': 'unknown', - 'Fetal Neuron': 'unknown', - 'Fetal chondrocyte': '1_Stroma', - 'Fetal endocrine cell': 'unknown', - 'Fetal enterocyte ': '1_Epithelial', - 'Fetal epithelial progenitor': '1_Epithelial', - 'Fetal fibroblast': 'Fibroblasts', - 'Fetal mesenchymal progenitor': '1_Stroma', - 'Fetal neuron': 'unknown', - 'Fetal skeletal muscle cell': 'unknown', - 'Fetal stromal cell': '1_Stroma', - 'Fibroblast': 'Fibroblasts', - 'Gastric endocrine cell': 'unknown', - 'Goblet cell': 'Secretory', - 'Kidney intercalated cell': 'unknown', - 'Loop of Henle': 'unknown', - 'M2 Macrophage': 'Macrophages', - 'Macrophage': 'Macrophages', - 'Mast cell': 'Mast cells', - 'Mesothelial cell': 'Mast cells', - 'Monocyte': 'Monocytes', - 'Myeloid cell': '2_Myeloid', - 'Neutrophil': 'Neutrophilic', - 'Neutrophil (RPS high)': 'Neutrophilic', - 'Primordial germ cell': 'unknown', - 'Proliferating T cell': 'T cell lineage', - 'Proximal tubule progenitor': 'unknown', - 'Sinusoidal endothelial cell': '1_Endothelial', - 'Smooth muscle cell': '2_Smooth Muscle', - 'Stratified epithelial cell': '1_Epithelial', - 'Stromal cell': '1_Stroma', - 'T cell': 'T cell lineage', - 'Ventricle cardiomyocyte': '1_Stroma', - 'hESC': 'Fetal airway progenitors', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "hcl_FetalLung_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py deleted file mode 100644 index 1da535072..000000000 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py +++ /dev/null @@ -1,112 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'lung' - self.sub_tissue = 'AdultLung' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'AT2 cell': 'AT2', - 'Antigen presenting cell (RPS high)': 'unknown', - 'B cell': 'B cell lineage', - 'B cell (Plasmocyte)': 'B cell lineage', - 'Basal cell': 'Basal', - 'CB CD34+': 'Fetal airway progenitors', - 'Chondrocyte': '1_Stroma', - 'Dendritic cell': 'Dendritic cells', - 'Endothelial cell': '1_Endothelial', - 'Endothelial cell (APC)': '1_Endothelial', - 'Endothelial cell (endothelial to mesenchymal transition)': '1_Endothelial', - 'Enterocyte progenitor': '1_Epithelial', - 'Epithelial cell': '1_Epithelial', - 'Epithelial cell (intermediated)': '1_Epithelial', - 'Erythroid cell': 'Erythrocytes', - 'Erythroid progenitor cell (RP high)': 'Erythrocytes', - 'Fasciculata cell': 'unknown', - 'Fetal Neuron': 'unknown', - 'Fetal chondrocyte': '1_Stroma', - 'Fetal endocrine cell': 'unknown', - 'Fetal enterocyte ': '1_Epithelial', - 'Fetal epithelial progenitor': '1_Epithelial', - 'Fetal fibroblast': 'Fibroblasts', - 'Fetal mesenchymal progenitor': '1_Stroma', - 'Fetal neuron': 'unknown', - 'Fetal skeletal muscle cell': 'unknown', - 'Fetal stromal cell': '1_Stroma', - 'Fibroblast': 'Fibroblasts', - 'Gastric endocrine cell': 'unknown', - 'Goblet cell': 'Secretory', - 'Kidney intercalated cell': 'unknown', - 'Loop of Henle': 'unknown', - 'M2 Macrophage': 'Macrophages', - 'Macrophage': 'Macrophages', - 'Mast cell': 'Mast cells', - 'Mesothelial cell': 'Mast cells', - 'Monocyte': 'Monocytes', - 'Myeloid cell': '2_Myeloid', - 'Neutrophil': 'Neutrophilic', - 'Neutrophil (RPS high)': 'Neutrophilic', - 'Primordial germ cell': 'unknown', - 'Proliferating T cell': 'T cell lineage', - 'Proximal tubule progenitor': 'unknown', - 'Sinusoidal endothelial cell': '1_Endothelial', - 'Smooth muscle cell': '2_Smooth Muscle', - 'Stratified epithelial cell': '1_Epithelial', - 'Stromal cell': '1_Stroma', - 'T cell': 'T cell lineage', - 'Ventricle cardiomyocyte': '1_Stroma', - 'hESC': 'Fetal airway progenitors', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py deleted file mode 100644 index 9e1566a2f..000000000 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py +++ /dev/null @@ -1,112 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'lung' - self.sub_tissue = 'AdultLung' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'AT2 cell': 'AT2', - 'Antigen presenting cell (RPS high)': 'unknown', - 'B cell': 'B cell lineage', - 'B cell (Plasmocyte)': 'B cell lineage', - 'Basal cell': 'Basal', - 'CB CD34+': 'Fetal airway progenitors', - 'Chondrocyte': '1_Stroma', - 'Dendritic cell': 'Dendritic cells', - 'Endothelial cell': '1_Endothelial', - 'Endothelial cell (APC)': '1_Endothelial', - 'Endothelial cell (endothelial to mesenchymal transition)': '1_Endothelial', - 'Enterocyte progenitor': '1_Epithelial', - 'Epithelial cell': '1_Epithelial', - 'Epithelial cell (intermediated)': '1_Epithelial', - 'Erythroid cell': 'Erythrocytes', - 'Erythroid progenitor cell (RP high)': 'Erythrocytes', - 'Fasciculata cell': 'unknown', - 'Fetal Neuron': 'unknown', - 'Fetal chondrocyte': '1_Stroma', - 'Fetal endocrine cell': 'unknown', - 'Fetal enterocyte ': '1_Epithelial', - 'Fetal epithelial progenitor': '1_Epithelial', - 'Fetal fibroblast': 'Fibroblasts', - 'Fetal mesenchymal progenitor': '1_Stroma', - 'Fetal neuron': 'unknown', - 'Fetal skeletal muscle cell': 'unknown', - 'Fetal stromal cell': '1_Stroma', - 'Fibroblast': 'Fibroblasts', - 'Gastric endocrine cell': 'unknown', - 'Goblet cell': 'Secretory', - 'Kidney intercalated cell': 'unknown', - 'Loop of Henle': 'unknown', - 'M2 Macrophage': 'Macrophages', - 'Macrophage': 'Macrophages', - 'Mast cell': 'Mast cells', - 'Mesothelial cell': 'Mast cells', - 'Monocyte': 'Monocytes', - 'Myeloid cell': '2_Myeloid', - 'Neutrophil': 'Neutrophilic', - 'Neutrophil (RPS high)': 'Neutrophilic', - 'Primordial germ cell': 'unknown', - 'Proliferating T cell': 'T cell lineage', - 'Proximal tubule progenitor': 'unknown', - 'Sinusoidal endothelial cell': '1_Endothelial', - 'Smooth muscle cell': '2_Smooth Muscle', - 'Stratified epithelial cell': '1_Epithelial', - 'Stromal cell': '1_Stroma', - 'T cell': 'T cell lineage', - 'Ventricle cardiomyocyte': '1_Stroma', - 'hESC': 'Fetal airway progenitors', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py deleted file mode 100644 index af7875309..000000000 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py +++ /dev/null @@ -1,112 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'lung' - self.sub_tissue = 'AdultLung' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'AT2 cell': 'AT2', - 'Antigen presenting cell (RPS high)': 'unknown', - 'B cell': 'B cell lineage', - 'B cell (Plasmocyte)': 'B cell lineage', - 'Basal cell': 'Basal', - 'CB CD34+': 'Fetal airway progenitors', - 'Chondrocyte': '1_Stroma', - 'Dendritic cell': 'Dendritic cells', - 'Endothelial cell': '1_Endothelial', - 'Endothelial cell (APC)': '1_Endothelial', - 'Endothelial cell (endothelial to mesenchymal transition)': '1_Endothelial', - 'Enterocyte progenitor': '1_Epithelial', - 'Epithelial cell': '1_Epithelial', - 'Epithelial cell (intermediated)': '1_Epithelial', - 'Erythroid cell': 'Erythrocytes', - 'Erythroid progenitor cell (RP high)': 'Erythrocytes', - 'Fasciculata cell': 'unknown', - 'Fetal Neuron': 'unknown', - 'Fetal chondrocyte': '1_Stroma', - 'Fetal endocrine cell': 'unknown', - 'Fetal enterocyte ': '1_Epithelial', - 'Fetal epithelial progenitor': '1_Epithelial', - 'Fetal fibroblast': 'Fibroblasts', - 'Fetal mesenchymal progenitor': '1_Stroma', - 'Fetal neuron': 'unknown', - 'Fetal skeletal muscle cell': 'unknown', - 'Fetal stromal cell': '1_Stroma', - 'Fibroblast': 'Fibroblasts', - 'Gastric endocrine cell': 'unknown', - 'Goblet cell': 'Secretory', - 'Kidney intercalated cell': 'unknown', - 'Loop of Henle': 'unknown', - 'M2 Macrophage': 'Macrophages', - 'Macrophage': 'Macrophages', - 'Mast cell': 'Mast cells', - 'Mesothelial cell': 'Mast cells', - 'Monocyte': 'Monocytes', - 'Myeloid cell': '2_Myeloid', - 'Neutrophil': 'Neutrophilic', - 'Neutrophil (RPS high)': 'Neutrophilic', - 'Primordial germ cell': 'unknown', - 'Proliferating T cell': 'T cell lineage', - 'Proximal tubule progenitor': 'unknown', - 'Sinusoidal endothelial cell': '1_Endothelial', - 'Smooth muscle cell': '2_Smooth Muscle', - 'Stratified epithelial cell': '1_Epithelial', - 'Stromal cell': '1_Stroma', - 'T cell': 'T cell lineage', - 'Ventricle cardiomyocyte': '1_Stroma', - 'hESC': 'Fetal airway progenitors', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py deleted file mode 100644 index 8233627f0..000000000 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py +++ /dev/null @@ -1,112 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'lung' - self.sub_tissue = 'FetalLung' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'AT2 cell': 'AT2', - 'Antigen presenting cell (RPS high)': 'unknown', - 'B cell': 'B cell lineage', - 'B cell (Plasmocyte)': 'B cell lineage', - 'Basal cell': 'Basal', - 'CB CD34+': 'Fetal airway progenitors', - 'Chondrocyte': '1_Stroma', - 'Dendritic cell': 'Dendritic cells', - 'Endothelial cell': '1_Endothelial', - 'Endothelial cell (APC)': '1_Endothelial', - 'Endothelial cell (endothelial to mesenchymal transition)': '1_Endothelial', - 'Enterocyte progenitor': '1_Epithelial', - 'Epithelial cell': '1_Epithelial', - 'Epithelial cell (intermediated)': '1_Epithelial', - 'Erythroid cell': 'Erythrocytes', - 'Erythroid progenitor cell (RP high)': 'Erythrocytes', - 'Fasciculata cell': 'unknown', - 'Fetal Neuron': 'unknown', - 'Fetal chondrocyte': '1_Stroma', - 'Fetal endocrine cell': 'unknown', - 'Fetal enterocyte ': '1_Epithelial', - 'Fetal epithelial progenitor': '1_Epithelial', - 'Fetal fibroblast': 'Fibroblasts', - 'Fetal mesenchymal progenitor': '1_Stroma', - 'Fetal neuron': 'unknown', - 'Fetal skeletal muscle cell': 'unknown', - 'Fetal stromal cell': '1_Stroma', - 'Fibroblast': 'Fibroblasts', - 'Gastric endocrine cell': 'unknown', - 'Goblet cell': 'Secretory', - 'Kidney intercalated cell': 'unknown', - 'Loop of Henle': 'unknown', - 'M2 Macrophage': 'Macrophages', - 'Macrophage': 'Macrophages', - 'Mast cell': 'Mast cells', - 'Mesothelial cell': 'Mast cells', - 'Monocyte': 'Monocytes', - 'Myeloid cell': '2_Myeloid', - 'Neutrophil': 'Neutrophilic', - 'Neutrophil (RPS high)': 'Neutrophilic', - 'Primordial germ cell': 'unknown', - 'Proliferating T cell': 'T cell lineage', - 'Proximal tubule progenitor': 'unknown', - 'Sinusoidal endothelial cell': '1_Endothelial', - 'Smooth muscle cell': '2_Smooth Muscle', - 'Stratified epithelial cell': '1_Epithelial', - 'Stromal cell': '1_Stroma', - 'T cell': 'T cell lineage', - 'Ventricle cardiomyocyte': '1_Stroma', - 'hESC': 'Fetal airway progenitors', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "hcl_FetalLung_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py deleted file mode 100644 index d347e5766..000000000 --- a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py +++ /dev/null @@ -1,122 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import scipy.sparse -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the data file provided by the authors. To obtain the file, you need to create a - free account at https://www.synapse.org. You can then use those login credentials to download the file with python - using the synapse client, installable via `pip install synapseclient`: - - import synapseclient - import shutil - syn = synapseclient.Synapse() - syn.login('synapse_username','password') - syn21625142 = syn.get(entity='syn21625142') - shutil.move(syn21625142.path, 'facs_normal_lung_blood_scanpy.20200205.RC4.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_smartseq2_travaglini_002_10.1038/s41586-020-2922-4" - self.download_website = "https://www.synapse.org/#!Synapse:syn21041850" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "proximal, medial, distal, blood" - self.annotated = True - - self.class_maps = { - "0": { - 'Intermediate Monocyte': 'Monocytes', - 'Adventitial Fibroblast': 'Fibroblasts', - 'Myofibroblast': 'Myofibroblasts', - 'Fibromyocyte': 'Fibromyocyte', - 'Basal': 'Basal', - 'IGSF21+ Dendritic': 'Macrophages', - 'CD8+ Memory/Effector T': 'T cell lineage', - 'CD4+ Naive T': 'T cell lineage', - 'Myeloid Dendritic Type 2': 'Dendritic cells', - 'Neuroendocrine': 'Rare', - 'Ciliated': 'Multiciliated lineage', - 'Nonclassical Monocyte': 'Monocytes', - 'Plasmacytoid Dendritic': 'Dendritic cells', - 'Vein': 'Venous', - 'Basophil/Mast 1': 'Mast cells', - 'Natural Killer T': 'T cell lineage', - 'Ionocyte': 'Rare', - 'Bronchial Vessel 1': 'Bronchial Vessel 1', - 'Natural Killer': 'Innate lymphoid cells', - 'Capillary Aerocyte': 'Capillary', - 'Vascular Smooth Muscle': '2_Smooth Muscle', - 'Macrophage': 'Macrophages', - 'Pericyte': 'Fibroblasts', - 'CD4+ Memory/Effector T': 'T cell lineage', - 'B': 'B cell lineage', - 'Lymphatic': 'Lymphatic EC', - 'Signaling Alveolar Epithelial Type 2': 'AT2', - 'Alveolar Epithelial Type 1': 'AT1', - 'Plasma': 'B cell lineage', - 'Lipofibroblast': 'Fibroblasts', - 'Capillary Intermediate 1': 'Capillary Intermediate 1', - 'Capillary': 'Capillary', - 'Alveolar Fibroblast': 'Fibroblasts', - 'Classical Monocyte': 'Monocytes', - 'Goblet': 'Secretory', - 'Airway Smooth Muscle': 'Airway smooth muscle', - 'Club': 'Secretory', - 'Proliferating NK/T': 'Innate lymphoid cells', - 'Alveolar Epithelial Type 2': 'AT2', - 'Differentiating Basal': 'Basal', - 'CD8+ Naive T': 'T cell lineage', - 'Artery': 'Arterial', - 'Neutrophil': 'Monocytes', - 'Dendritic': 'Dendritic cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad") - self.adata = anndata.read(fn) - self.adata.X = scipy.sparse.csc_matrix(self.adata.X) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nReads'].values[:, None])) \ - .multiply(1 / 1000000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Krasnow' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41586-020-2922-4" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'smartseq2' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].astype('category') - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/malegonad/__init__.py b/sfaira/data/human/malegonad/__init__.py deleted file mode 100644 index bf7a87036..000000000 --- a/sfaira/data/human/malegonad/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_malegonad import DatasetGroupMalegonad diff --git a/sfaira/data/human/malegonad/external.py b/sfaira/data/human/malegonad/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/malegonad/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/malegonad/human_malegonad.py b/sfaira/data/human/malegonad/human_malegonad.py deleted file mode 100644 index 681f1f334..000000000 --- a/sfaira/data/human/malegonad/human_malegonad.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_malegonad_2018_10x_guo_001 import Dataset as Dataset0001 -from .human_malegonad_2020_microwell_han_001 import Dataset as Dataset0002 -from .human_malegonad_2020_microwell_han_002 import Dataset as Dataset0003 - - -class DatasetGroupMalegonad(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupMalegonad - self.datasets.update(DatasetGroupMalegonad(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py deleted file mode 100644 index 0b4b7ab2f..000000000 --- a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py +++ /dev/null @@ -1,78 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_malegonad_2018_10x_guo_001_10.1038/s41422-018-0099-2" - self.download_website = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" - self.download_website_meta = None - self.organ = "malegonad" - self.sub_tissue = "testis" - self.annotated = True - - self.class_maps = { - "0": { - 'Elongated Spermatids': 'Elongated Spermatids', - 'Leydig cells': 'Leydig cells', - 'Early Primary Spermatocytes': 'Early Primary Spermatocytes', - 'Round Spermatids': 'Round Spermatids', - 'Endothelial cells': 'Endothelial cells', - 'Macrophages': 'Macrophages', - 'Myoid cells': 'Myoid cells', - 'Differentiating Spermatogonia': 'Differentiating Spermatogonia', - 'Late primary Spermatocytes': 'Late primary Spermatocytes', - 'Spermatogonial Stem cell': 'Spermatogonial Stem cell', - 'Sertoli cells': 'Sertoli cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "malegonad", "guo18_donor.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Cairns" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41422-018-0099-2" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py deleted file mode 100644 index 045e0ba9c..000000000 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py +++ /dev/null @@ -1,90 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_malegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'malegonad' - self.sub_tissue = 'FetalMaleGonad' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal acinar cell': 'Fetal acinar cell', - 'Fetal chondrocyte': 'Fetal chondrocyte', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal fibroblast': 'Fetal fibroblast', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal neuron', - 'Fetal skeletal muscle cell': 'Fetal skeletal muscle cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Immature sertoli cell (Pre-Sertoli cell)': 'Sertoli cells', - 'Loop of Henle': 'Loop of Henle', - 'Macrophage': 'Macrophages', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "malegonad", "hcl_FetalMaleGonad_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py deleted file mode 100644 index 3df75da7f..000000000 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py +++ /dev/null @@ -1,90 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_malegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'malegonad' - self.sub_tissue = 'FetalMaleGonad' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal acinar cell': 'Fetal acinar cell', - 'Fetal chondrocyte': 'Fetal chondrocyte', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal fibroblast': 'Fetal fibroblast', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal neuron', - 'Fetal skeletal muscle cell': 'Fetal skeletal muscle cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Immature sertoli cell (Pre-Sertoli cell)': 'Sertoli cells', - 'Loop of Henle': 'Loop of Henle', - 'Macrophage': 'Macrophages', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "malegonad", "hcl_FetalMaleGonad_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/mixed/__init__.py b/sfaira/data/human/mixed/__init__.py deleted file mode 100644 index 5c885d57c..000000000 --- a/sfaira/data/human/mixed/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_mixed import DatasetGroupMixed diff --git a/sfaira/data/human/mixed/external.py b/sfaira/data/human/mixed/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/mixed/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/mixed/human_mixed.py b/sfaira/data/human/mixed/human_mixed.py deleted file mode 100644 index 9041ae787..000000000 --- a/sfaira/data/human/mixed/human_mixed.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_mixed_2019_10x_szabo_001 import Dataset as Dataset0001 - - -class DatasetGroupMixed(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupMixed - self.datasets.update(DatasetGroupMixed(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py deleted file mode 100644 index dde7c0605..000000000 --- a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py +++ /dev/null @@ -1,189 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import tarfile -import pandas as pd -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data files if `load_raw=True` is passed to self.load() - To download the datafile required by this dataloader, use the link provided as the `download_website` attribute of - this class. The required celltype annotations for the data were kindly provided to us by the authors of the paper. - Please contact them directly to pbtain the required annotation files (donor1.annotation.txt and - donor2.annotation.txt). For (up to 100-fold faster) repeated data loading, please pass `load_raw=False` when calling - the self.load() method. For this, you need to preprocess the raw files as below and place the resulting h5ad file in - the data folder of this organ: - - import anndata - import tarfile - import pandas as pd - import scipy.sparse - adatas = [] - with tarfile.open('GSE126030_RAW.tar') as tar: - for member in tar.getmembers(): - df = pd.read_csv(tar.extractfile(member.name), compression='gzip', sep='\t') - df.index = [i.split('.')[0] for i in df['Accession']] - var = pd.concat([df.pop(x) for x in ['Gene', 'Accession']], 1) - if df.columns[-1].startswith('Un'): - df.drop(df.columns[len(df.columns)-1], axis=1, inplace=True) - adata = anndata.AnnData(df.T) - adata.var = var - if "PP001" in member.name or "PP002" in member.name: - adata.obs['donor'] = 'Donor1' - adata.obs['organ'] = 'Lung' - elif "PP003" in member.name or "PP004" in member.name: - adata.obs['donor'] = 'Donor1' - adata.obs['organ'] = 'Bone Marrow' - elif "PP005" in member.name or "PP006" in member.name: - adata.obs['donor'] = 'Donor1' - adata.obs['organ'] = 'Lymph Node' - elif "PP009" in member.name or "PP010" in member.name: - adata.obs['donor'] = 'Donor2' - adata.obs['organ'] = 'Lung' - elif "PP011" in member.name or "PP012" in member.name: - adata.obs['donor'] = 'Donor2' - adata.obs['organ'] = 'Bone Marrow' - elif "PP013" in member.name or "PP014" in member.name: - adata.obs['donor'] = 'Donor2' - adata.obs['organ'] = 'Lymph Node' - else: - continue - adata.obs.index = member.name.split('_')[1].split('s')[0]+'nskept.'+adata.obs.index - adatas.append(adata) - adata = adatas[0].concatenate(adatas[1:], index_unique=None) - adata.obs.drop('batch', axis=1, inplace=True) - adata = adata[:,adata.X.sum(axis=0) > 0].copy() - adata.obs['cell_ontology_class'] = 'Unknown' - df1 = pd.read_csv('donor1.annotation.txt', sep='\t', index_col=0, header=None) - df2 = pd.read_csv('donor2.annotation.txt', sep='\t', index_col=0, header=None) - for i in df1.index: - adata.obs['cell_ontology_class'].loc[i] = df1.loc[i][1] - for i in df2.index: - adata.obs['cell_ontology_class'].loc[i] = df2.loc[i][1] - adata.X = scipy.sparse.csc_matrix(adata.X) - adata.write('GSE126030.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_mixed_2019_10x_szabo_001_10.1038/s41467-019-12464-3" - self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" - self.download_website_meta = 'private' - self.organ = "mixed" - self.sub_tissue = "Bone Marrow, Lung, Lymph Node" - self.annotated = True - self.loaded = False - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "mixed", "GSE126030_RAW.tar"), - os.path.join(self.path, "human", "mixed", "donor1.annotation.txt"), - os.path.join(self.path, "human", "mixed", "donor2.annotation.txt"), - ] - adatas = [] - with tarfile.open(fn[0]) as tar: - for member in tar.getmembers(): - df = pd.read_csv(tar.extractfile(member.name), compression='gzip', sep='\t') - df.index = [i.split('.')[0] for i in df['Accession']] - var = pd.concat([df.pop(x) for x in ['Gene', 'Accession']], 1) - if df.columns[-1].startswith('Un'): - df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) - self.adata = anndata.AnnData(df.T) - self.adata.var = var - if "PP001" in member.name or "PP002" in member.name: - self.adata.obs['donor'] = 'Donor1' - self.adata.obs['organ'] = 'Lung' - elif "PP003" in member.name or "PP004" in member.name: - self.adata.obs['donor'] = 'Donor1' - self.adata.obs['organ'] = 'Bone Marrow' - elif "PP005" in member.name or "PP006" in member.name: - self.adata.obs['donor'] = 'Donor1' - self.adata.obs['organ'] = 'Lymph Node' - elif "PP009" in member.name or "PP010" in member.name: - self.adata.obs['donor'] = 'Donor2' - self.adata.obs['organ'] = 'Lung' - elif "PP011" in member.name or "PP012" in member.name: - self.adata.obs['donor'] = 'Donor2' - self.adata.obs['organ'] = 'Bone Marrow' - elif "PP013" in member.name or "PP014" in member.name: - self.adata.obs['donor'] = 'Donor2' - self.adata.obs['organ'] = 'Lymph Node' - else: - continue - self.adata.obs.index = member.name.split('_')[1].split('s')[0] + 'nskept.' + self.adata.obs.index - adatas.append(self.adata) - self.adata = adatas[0].concatenate(adatas[1:], index_unique=None) - self.adata.obs.drop('batch', axis=1, inplace=True) - self.adata = self.adata[:, self.adata.X.sum(axis=0) > 0].copy() - self.adata.obs['cell_ontology_class'] = 'Unknown' - df1 = pd.read_csv(fn[1], sep='\t', index_col=0, header=None) - df2 = pd.read_csv(fn[2], sep='\t', index_col=0, header=None) - for i in df1.index: - self.adata.obs['cell_ontology_class'].loc[i] = df1.loc[i][1] - for i in df2.index: - self.adata.obs['cell_ontology_class'].loc[i] = df2.loc[i][1] - self.adata.X = scipy.sparse.csc_matrix(self.adata.X) - else: - if fn is None: - fn = os.path.join(self.path, "human", "mixed", "GSE126030.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Sims" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41467-019-12464-3" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs["subtissue"] = self.adata.obs["organ"] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='Gene', ensembl_col='Accession') - - # If the subset_organs() method has been run before, subset to specified organs - if "organsubset" in self.__dict__: - self.adata = self.adata[self.adata.obs['organ'].isin(self.organsubset)] - # If adata object is empty, set it to None - if not len(self.adata): - self.adata = None - self.loaded = True - - @property - def ncells(self): - if "organsubset" in self.__dict__: - if not self.loaded: - self._load() - if self.adata is None: - return 0 - else: - return self.adata.n_obs - else: - return super().ncells diff --git a/sfaira/data/human/muscle/__init__.py b/sfaira/data/human/muscle/__init__.py deleted file mode 100644 index f6c2f1d41..000000000 --- a/sfaira/data/human/muscle/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_muscle import DatasetGroupMuscle diff --git a/sfaira/data/human/muscle/external.py b/sfaira/data/human/muscle/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/muscle/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/muscle/human_muscle.py b/sfaira/data/human/muscle/human_muscle.py deleted file mode 100644 index 30cdb789b..000000000 --- a/sfaira/data/human/muscle/human_muscle.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_muscle_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_muscle_2020_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupMuscle(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupMuscle - self.datasets.update(DatasetGroupMuscle(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py deleted file mode 100644 index c8da3462a..000000000 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'muscle' - self.sub_tissue = 'FetalMuscle' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "muscle", "hcl_FetalMuscle_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py deleted file mode 100644 index 032d37ce1..000000000 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_muscle_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'muscle' - self.sub_tissue = 'AdultMuscle' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "muscle", "hcl_AdultMuscle_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/omentum/__init__.py b/sfaira/data/human/omentum/__init__.py deleted file mode 100644 index 330530786..000000000 --- a/sfaira/data/human/omentum/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_omentum import DatasetGroupOmentum diff --git a/sfaira/data/human/omentum/external.py b/sfaira/data/human/omentum/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/omentum/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/omentum/human_omentum.py b/sfaira/data/human/omentum/human_omentum.py deleted file mode 100644 index f16e94458..000000000 --- a/sfaira/data/human/omentum/human_omentum.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_omentum_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_omentum_2020_microwell_han_002 import Dataset as Dataset0002 -from .human_omentum_2020_microwell_han_003 import Dataset as Dataset0003 - - -class DatasetGroupOmentum(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupOmentum - self.datasets.update(DatasetGroupOmentum(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py deleted file mode 100644 index fa911836d..000000000 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_omentum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'omentum' - self.sub_tissue = 'AdultOmentum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py deleted file mode 100644 index 3fb5c9d11..000000000 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_omentum_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'omentum' - self.sub_tissue = 'AdultOmentum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py deleted file mode 100644 index 7cda691f9..000000000 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_omentum_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'omentum' - self.sub_tissue = 'AdultOmentum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/pancreas/__init__.py b/sfaira/data/human/pancreas/__init__.py deleted file mode 100644 index 34f7a3229..000000000 --- a/sfaira/data/human/pancreas/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_pancreas import DatasetGroupPancreas diff --git a/sfaira/data/human/pancreas/external.py b/sfaira/data/human/pancreas/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/pancreas/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/pancreas/human_pancreas.py b/sfaira/data/human/pancreas/human_pancreas.py deleted file mode 100644 index e609c44fe..000000000 --- a/sfaira/data/human/pancreas/human_pancreas.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_pancreas_2016_indrop_baron_001 import Dataset as Dataset0001 -from .human_pancreas_2016_smartseq2_segerstolpe_001 import Dataset as Dataset0002 -from .human_pancreas_2017_smartseq2_enge_001 import Dataset as Dataset0003 -from .human_pancreas_2020_microwell_han_001 import Dataset as Dataset0004 -from .human_pancreas_2020_microwell_han_002 import Dataset as Dataset0005 -from .human_pancreas_2020_microwell_han_003 import Dataset as Dataset0006 -from .human_pancreas_2020_microwell_han_004 import Dataset as Dataset0007 - - -class DatasetGroupPancreas(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupPancreas - self.datasets.update(DatasetGroupPancreas(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py deleted file mode 100644 index 7afe0094b..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py +++ /dev/null @@ -1,80 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2016_indrop_baron_001_10.1016/j.cels.2016.08.011" - self.download_website = "https://covid19.cog.sanger.ac.uk/baron16.processed.h5ad" - self.download_website_meta = None - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 't_cell': 'T cell', - 'quiescent_stellate': 'Quiescent Stellate cell', - 'mast': 'Mast cell', - 'delta': 'Delta cell', - 'beta': 'Beta cell', - 'endothelial': 'Endothelial cell', - 'macrophage': 'Macrophage', - 'epsilon': 'Epsilon cell', - 'activated_stellate': 'Activated Stellate cell', - 'acinar': 'Acinar cell', - 'alpha': 'Alpha cell', - 'ductal': 'Ductal cell', - 'schwann': 'Schwann cell', - 'gamma': 'Gamma cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "baron16.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Yanai" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2016 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cels.2016.08.011" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'inDrop' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py deleted file mode 100644 index 44d2d183e..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data files which can be obtained from the `download_website` and - `download_website_meta` attributes of this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2016_smartseq2_segerstolpe_001_10.1016/j.cmet.2016.08.020" - self.download_website = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.processed.1.zip" - self.download_website_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.sdrf.txt" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'alpha cell': 'Alpha cell', - 'ductal cell': 'Ductal cell', - 'beta cell': 'Beta cell', - 'gamma cell': 'Gamma cell', - 'acinar cell': 'Acinar cell', - 'delta cell': 'Delta cell', - 'PSC cell': 'PSC cell', - 'unclassified endocrine cell': 'Unclassified endocrine cell', - 'co-expression cell': 'Co-expression cell', - 'endothelial cell': 'Endothelial cell', - 'epsilon cell': 'Epsilon cell', - 'mast cell': 'Mast cell', - 'MHC class II cell': 'MHC class II cell', - 'unclassified cell': 'Unknown', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.processed.1.zip"), - os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.sdrf.txt") - ] - df = pd.read_csv(fn[0], sep='\t') - df.index = df.index.get_level_values(0) - df = df.drop('#samples', axis=1) - df = df.T.iloc[:, :26178] - self.adata = anndata.AnnData(df) - self.adata.obs = pd.read_csv(fn[1], sep='\t').set_index('Source Name').loc[self.adata.obs.index] - # filter observations which are not cells (empty wells, low quality cells etc.) - self.adata = self.adata[self.adata.obs['Characteristics[cell type]'] != 'not applicable'].copy() - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Sandberg" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2016 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2016.08.020" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'Smartseq2' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = [True if line == 'normal' else False for line in self.adata.obs['Characteristics[disease]']] - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['Characteristics[disease]'].astype('category') - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact].cat.rename_categories({'normal':'healthy', 'type II diabetes mellitus':'type II diabetes mellitus'}) - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Characteristics[cell type]'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py deleted file mode 100644 index 6bdd8965e..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py +++ /dev/null @@ -1,147 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import tarfile -import gzip -from io import StringIO -import anndata as ad -import pandas as pd -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data file if `load_raw=True` is passed to self.load() - To download the datafile required by this dataloader, use the link provided as the `download_website` and - `download_website_meta` attributes of this class. For (up to 100-fold faster) repeated data loading, please pass - `load_raw=False` when calling the self.load() method. For this, you need to preprocess the raw files as below and - place the resulting h5ad file in the data folder of this organ: - - import tarfile - import os - import gzip - from io import StringIO - import anndata as ad - import pandas as pd - import scipy.sparse - dfs = [] - with tarfile.open("GSE81547_RAW.tar") as tar: - for member in tar.getmembers(): - d = pd.read_csv(tar.extractfile(member), compression='gzip', header=None, sep='\t', index_col=0, names=[member.name.split("_")[0]]) - dfs.append(d) - adata = ad.AnnData(pd.concat(dfs, axis=1).iloc[1:-6].T) - adata.X = scipy.sparse.csc_matrix(adata.X) - with gzip.open('GSE81547_series_matrix.txt.gz') as f: - file_content = [i.decode("utf-8") for i in f.readlines()] - inputstring = '' - for line in file_content: - if '"ID_REF"' in line: - inputstring += line - if '!Sample_title' in line: - inputstring += line[1:] - if '!Sample_characteristics_ch1\t"inferred_cell_type: alpha' in line: - inputstring += line[1:] - data = StringIO(inputstring) - d = pd.read_csv(data, sep='\t').T - d.columns=d.iloc[0] - d.drop('Sample_title', inplace=True) - d = d.reset_index().set_index('ID_REF') - d.columns.name = None - d.index.name = None - adata.obs['celltype'] = [d.loc[i]['Sample_characteristics_ch1'].split(": ")[1] for i in adata.obs.index] - adata.obs['patient'] = ["_".join(d.loc[i]['index'].split('_')[:2]) for i in adata.obs.index] - adata.write('GSE81547.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2017_smartseq2_enge_001_10.1016/j.cell.2017.09.004" - self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/suppl/GSE81547_RAW.tar" - self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/matrix/GSE81547_series_matrix.txt.gz" - self.organ = "pancreas" - self.sub_tissue = "islet of Langerhans" - self.annotated = True - - self.class_maps = { - "0": { - 'alpha': 'Alpha cell', - 'acinar': 'Acinar cell', - 'ductal': 'Ductal cell', - 'beta': 'Beta cell', - 'unsure': 'Unknown', - 'delta': 'Delta cell', - 'mesenchymal': 'Mesenchymal Cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "pancreas", "GSE81547_RAW.tar"), - os.path.join(self.path, "human", "pancreas", "GSE81547_series_matrix.txt.gz") - ] - dfs = [] - with tarfile.open(fn[0]) as tar: - for member in tar.getmembers(): - d = pd.read_csv(tar.extractfile(member), compression='gzip', header=None, sep='\t', index_col=0, - names=[member.name.split("_")[0]]) - dfs.append(d) - self.adata = ad.AnnData(pd.concat(dfs, axis=1).iloc[1:-6].T) - self.adata.X = scipy.sparse.csc_matrix(self.adata.X) - with gzip.open(fn[1]) as f: - file_content = [i.decode("utf-8") for i in f.readlines()] - inputstring = '' - for line in file_content: - if '"ID_REF"' in line: - inputstring += line - if '!Sample_title' in line: - inputstring += line[1:] - if '!Sample_characteristics_ch1\t"inferred_cell_type: alpha' in line: - inputstring += line[1:] - data = StringIO(inputstring) - d = pd.read_csv(data, sep='\t').T - d.columns = d.iloc[0] - d.drop('Sample_title', inplace=True) - d = d.reset_index().set_index('ID_REF') - d.columns.name = None - d.index.name = None - self.adata.obs['celltype'] = [d.loc[i]['Sample_characteristics_ch1'].split(": ")[1] for i in self.adata.obs.index] - self.adata.obs['patient'] = ["_".join(d.loc[i]['index'].split('_')[:2]) for i in self.adata.obs.index] - - else: - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "GSE81547.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2017 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2017.09.004" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'Smartseq2' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py deleted file mode 100644 index bcc01b053..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py +++ /dev/null @@ -1,99 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Pancreas' - self.sub_tissue = 'AdultPancreas' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Basal cell': 'Basal cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Mesenchymal Cell', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Immature sertoli cell (Pre-Sertoli cell)': 'Immature sertoli cell (Pre-Sertoli cell)', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "hcl_AdultPancreas_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py deleted file mode 100644 index 22ff8326d..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py +++ /dev/null @@ -1,99 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'Pancreas' - self.sub_tissue = 'FetalPancreas' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Basal cell': 'Basal cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Mesenchymal Cell', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Immature sertoli cell (Pre-Sertoli cell)': 'Immature sertoli cell (Pre-Sertoli cell)', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py deleted file mode 100644 index 7bd2e3004..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py +++ /dev/null @@ -1,99 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'Pancreas' - self.sub_tissue = 'FetalPancreas' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Basal cell': 'Basal cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Mesenchymal Cell', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Immature sertoli cell (Pre-Sertoli cell)': 'Immature sertoli cell (Pre-Sertoli cell)', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py deleted file mode 100644 index c218c072c..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py +++ /dev/null @@ -1,99 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'Pancreas' - self.sub_tissue = 'FetalPancreas' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Basal cell': 'Basal cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Mesenchymal Cell', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Immature sertoli cell (Pre-Sertoli cell)': 'Immature sertoli cell (Pre-Sertoli cell)', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/placenta/__init__.py b/sfaira/data/human/placenta/__init__.py deleted file mode 100644 index 5ea45fa0a..000000000 --- a/sfaira/data/human/placenta/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_placenta import DatasetGroupPlacenta diff --git a/sfaira/data/human/placenta/external.py b/sfaira/data/human/placenta/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/placenta/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/placenta/human_placenta.py b/sfaira/data/human/placenta/human_placenta.py deleted file mode 100644 index 4700e8083..000000000 --- a/sfaira/data/human/placenta/human_placenta.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_placenta_2018_smartseq2_ventotormo_001 import Dataset as Dataset0001 -from .human_placenta_2018_10x_ventotormo_001 import Dataset as Dataset0002 -from .human_placenta_2020_microwell_han_001 import Dataset as Dataset0003 - - -class DatasetGroupPlacenta(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupPlacenta - self.datasets.update(DatasetGroupPlacenta(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py deleted file mode 100644 index e5c833dac..000000000 --- a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py +++ /dev/null @@ -1,111 +0,0 @@ -import os -from typing import Union -from .external import DatasetBase -import pandas as pd -import anndata - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` and - `download_website_meta` attributes of this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_placenta_2018_10x_ventotormo_10.1038/s41586-018-0698-6" - self.download_website = 'https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/E-MTAB-6701.processed.1.zip' - self.download_website_meta = 'https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/E-MTAB-6701.processed.2.zip' - self.organ = "placenta" - self.sub_tissue = "placenta, decidua, blood" - self.annotated = True - - self.class_maps = { - "0": { - 'DC1': 'Dendritic Cells 1', - 'DC2': 'Dendritic Cells 2', - 'EVT': 'Extravillous Trophoblasts', - 'Endo (f)': 'Endothelial Cells f', - 'Endo (m)': 'Endothelial Cells m', - 'Endo L': 'Endothelial Cells L', - 'Epi1': 'Epithelial Glandular Cells 1', - 'Epi2': 'Epithelial Glandular Cells 2', - 'Granulocytes': 'Granulocytes', - 'HB': 'Hofbauer Cells', - 'ILC3': 'ILC3', - 'MO': 'Monocyte', - 'NK CD16+': 'NK Cells CD16+', - 'NK CD16-': 'NK Cells CD16-', - 'Plasma': 'B cell (Plasmocyte)', - 'SCT': 'Syncytiotrophoblasts', - 'Tcells': 'T cell', - 'VCT': 'Villous Cytotrophoblasts', - 'dM1': 'Decidual Macrophages 1', - 'dM2': 'Decidual Macrophages 2', - 'dM3': 'Decidual Macrophages 3', - 'dNK p': 'Decidual NK Cells p', - 'dNK1': 'Decidual NK Cells 1', - 'dNK2': 'Decidual NK Cells 2', - 'dNK3': 'Decidual NK Cells 3', - 'dP1': 'Perivascular Cells 1', - 'dP2': 'Perivascular Cells 2', - 'dS1': 'Decidual Stromal Cells 1', - 'dS2': 'Decidual Stromal Cells 2', - 'dS3': 'Decidual Stromal Cells 3', - 'fFB1': 'Fibroblasts 1', - 'fFB2': 'Fibroblasts 2', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.1.zip"), - os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.2.zip"), - ] - self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t', index_col='Gene').T) - df = pd.read_csv(fn[1], sep='\t') - for i in df.columns: - self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-018-0698-6' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs = self.adata.obs.rename({'location': 'organ'}, axis='columns') - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['annotation'] - self.adata.obs["subtissue"] = self.adata.obs["organ"].copy() - self.adata.obs["final_cluster"] = self.adata.obs['final_cluster'].astype('category') - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self.adata.var['ensembl'] = [i.split("_")[1] for i in self.adata.var.index] - self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] - self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - - self.adata = self.adata[:, ~self.adata.var.index.isin( - ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py deleted file mode 100644 index 8de479d29..000000000 --- a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py +++ /dev/null @@ -1,111 +0,0 @@ -import os -from typing import Union -from .external import DatasetBase -import pandas as pd -import anndata - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` and - `download_website_meta` attributes of this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_placenta_2018_smartseq2_ventotormo_10.1038/s41586-018-0698-6" - self.download_website = 'https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6678/E-MTAB-6678.processed.1.zip' - self.download_website_meta = 'https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6678/E-MTAB-6678.processed.2.zip' - self.organ = "placenta" - self.sub_tissue = "placenta, decidua, blood" - self.annotated = True - - self.class_maps = { - "0": { - 'DC1': 'Dendritic Cells 1', - 'DC2': 'Dendritic Cells 2', - 'EVT': 'Extravillous Trophoblasts', - 'Endo (f)': 'Endothelial Cells f', - 'Endo (m)': 'Endothelial Cells m', - 'Endo L': 'Endothelial Cells L', - 'Epi1': 'Epithelial Glandular Cells 1', - 'Epi2': 'Epithelial Glandular Cells 2', - 'Granulocytes': 'Granulocytes', - 'HB': 'Hofbauer Cells', - 'ILC3': 'ILC3', - 'MO': 'Monocyte', - 'NK CD16+': 'NK Cells CD16+', - 'NK CD16-': 'NK Cells CD16-', - 'Plasma': 'B cell (Plasmocyte)', - 'SCT': 'Syncytiotrophoblasts', - 'Tcells': 'T cell', - 'VCT': 'Villous Cytotrophoblasts', - 'dM1': 'Decidual Macrophages 1', - 'dM2': 'Decidual Macrophages 2', - 'dM3': 'Decidual Macrophages 3', - 'dNK p': 'Decidual NK Cells p', - 'dNK1': 'Decidual NK Cells 1', - 'dNK2': 'Decidual NK Cells 2', - 'dNK3': 'Decidual NK Cells 3', - 'dP1': 'Perivascular Cells 1', - 'dP2': 'Perivascular Cells 2', - 'dS1': 'Decidual Stromal Cells 1', - 'dS2': 'Decidual Stromal Cells 2', - 'dS3': 'Decidual Stromal Cells 3', - 'fFB1': 'Fibroblasts 1', - 'fFB2': 'Fibroblasts 2', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.1.zip"), - os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.2.zip"), - ] - self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t', index_col='Gene').T) - df = pd.read_csv(fn[1], sep='\t') - for i in df.columns: - self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-018-0698-6' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "Smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs = self.adata.obs.rename({'location': 'organ'}, axis='columns') - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['annotation'] - self.adata.obs["subtissue"] = self.adata.obs["organ"].copy() - self.adata.obs["final_cluster"] = self.adata.obs['final_cluster'].astype('category') - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self.adata.var['ensembl'] = [i.split("_")[1] for i in self.adata.var.index] - self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] - self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - - self.adata = self.adata[:, ~self.adata.var.index.isin( - ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py deleted file mode 100644 index a87c4a0a8..000000000 --- a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py +++ /dev/null @@ -1,92 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_placenta_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Placenta' - self.sub_tissue = 'Placenta' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Fibroblast': 'Fibroblast', - 'Macrophage': 'Macrophage', - 'Epithelial cell': 'Epithelial cell', - 'Erythroid cell': 'Erythroid cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Stromal cell': 'Stromal cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Endothelial cell': 'Endothelial cell', - 'T cell': 'T cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Intermediated cell': 'Intermediated cell', - 'Dendritic cell': 'Dendritic cell', - 'CB CD34+': 'CB CD34+', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Fetal neuron': 'Fetal neuron', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'B cell': 'B cell', - 'Epithelial cell (intermediated)': 'Epithelial cell (intermediated)', - 'hESC': 'hESC', - 'Basal cell': 'Basal cell', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal fibroblast': 'Fetal fibroblast', - 'Fetal skeletal muscle cell': 'Fetal skeletal muscle cell', - 'M2 Macrophage': 'M2 Macrophage', - 'Myeloid cell': 'Myeloid cell', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "placenta", "hcl_Placenta_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pleura/__init__.py b/sfaira/data/human/pleura/__init__.py deleted file mode 100644 index fbfcb922f..000000000 --- a/sfaira/data/human/pleura/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_pleura import DatasetGroupPleura diff --git a/sfaira/data/human/pleura/external.py b/sfaira/data/human/pleura/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/pleura/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/pleura/human_pleura.py b/sfaira/data/human/pleura/human_pleura.py deleted file mode 100644 index da4bba12c..000000000 --- a/sfaira/data/human/pleura/human_pleura.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_pleura_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupPleura(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupPleura - self.datasets.update(DatasetGroupPleura(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py deleted file mode 100644 index 80e79ecd1..000000000 --- a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pleura_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'pleura' - self.sub_tissue = 'AdultPleura' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "pleura", "hcl_AdultPleura_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/prostate/__init__.py b/sfaira/data/human/prostate/__init__.py deleted file mode 100644 index 34ef08dc6..000000000 --- a/sfaira/data/human/prostate/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_prostate import DatasetGroupProstate diff --git a/sfaira/data/human/prostate/external.py b/sfaira/data/human/prostate/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/prostate/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/prostate/human_prostate.py b/sfaira/data/human/prostate/human_prostate.py deleted file mode 100644 index 71f2a1991..000000000 --- a/sfaira/data/human/prostate/human_prostate.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_prostate_2018_10x_henry_001 import Dataset as Dataset0001 -from .human_prostate_2020_microwell_han_001 import Dataset as Dataset0002 - - -class DatasetGroupProstate(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupProstate - self.datasets.update(DatasetGroupProstate(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py deleted file mode 100644 index a0d2ed360..000000000 --- a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py +++ /dev/null @@ -1,75 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_prostate_2018_10x_henry_001_10.1016/j.celrep.2018.11.086" - self.download_website = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" - self.download_website_meta = None - self.organ = "prostate" - self.sub_tissue = "prostate" - self.annotated = True - - self.class_maps = { - "0": { - 'Basal': 'Basal cell', - 'Hillock': 'Hillock', - 'Luminal': 'Luminal', - 'Endothelia': 'Endothelial cell', - 'Club': 'Club', - 'Fibroblast': 'Fibroblast', - 'Smooth muscle': 'Smooth muscle cell', - 'Leukocytes': 'Leukocytes', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "prostate", "henry18_0.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Strand" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.celrep.2018.11.086" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py deleted file mode 100644 index ef194dee4..000000000 --- a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_prostate_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'prostate' - self.sub_tissue = 'AdultProstate' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Basal cell': 'Basal cell', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell (intermediated)': 'Epithelial cell (intermediated)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal enterocyte': 'Fetal enterocyte', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Primordial germ cell': 'Primordial germ cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "prostate", "hcl_AdultProstate_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/rectum/__init__.py b/sfaira/data/human/rectum/__init__.py deleted file mode 100644 index a341faa2e..000000000 --- a/sfaira/data/human/rectum/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_rectum import DatasetGroupRectum diff --git a/sfaira/data/human/rectum/external.py b/sfaira/data/human/rectum/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/rectum/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/rectum/human_rectum.py b/sfaira/data/human/rectum/human_rectum.py deleted file mode 100644 index bc246d917..000000000 --- a/sfaira/data/human/rectum/human_rectum.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_rectum_2019_10x_wang_001 import Dataset as Dataset0001 -from .human_rectum_2020_microwell_han_001 import Dataset as Dataset0002 - - -class DatasetGroupRectum(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupRectum - self.datasets.update(DatasetGroupRectum(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py deleted file mode 100644 index 8ef1d79b6..000000000 --- a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_rectum_2019_10x_wang_001_10.1084/jem.20191130" - self.download_website = "https://covid19.cog.sanger.ac.uk/wang20_rectum.processed.h5ad" - self.organ = "rectum" - self.sub_tissue = "rectum" - self.annotated = True - - self.class_maps = { - "0": { - 'Progenitor': 'Enterocyte progenitor', - 'Goblet': 'Goblet', - 'Enterocyte': 'Enterocyte', - 'Paneth-like': 'Paneth-like', - 'Stem Cell': 'Stem Cell', - 'TA': 'TA', - 'Enteriendocrine': 'Enteroendocrine', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "rectum", "wang20_rectum.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Chen" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py deleted file mode 100644 index 702e630a7..000000000 --- a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py +++ /dev/null @@ -1,76 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_rectum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'rectum' - self.sub_tissue = 'AdultRectum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Enterocyte': 'Enterocyte', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Erythroid cell': 'Erythroid cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "rectum", "hcl_AdultRectum_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/rib/__init__.py b/sfaira/data/human/rib/__init__.py deleted file mode 100644 index e648dcc40..000000000 --- a/sfaira/data/human/rib/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_rib import DatasetGroupRib diff --git a/sfaira/data/human/rib/external.py b/sfaira/data/human/rib/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/rib/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/rib/human_rib.py b/sfaira/data/human/rib/human_rib.py deleted file mode 100644 index 1a5481a0b..000000000 --- a/sfaira/data/human/rib/human_rib.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_rib_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_rib_2020_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupRib(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupRib - self.datasets.update(DatasetGroupRib(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py deleted file mode 100644 index c2d3a7c5b..000000000 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_rib_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'rib' - self.sub_tissue = 'FetalRib' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "rib", "hcl_FetalRib_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py deleted file mode 100644 index a909d5890..000000000 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_rib_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'rib' - self.sub_tissue = 'FetalRib' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "rib", "hcl_FetalRib_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/skin/__init__.py b/sfaira/data/human/skin/__init__.py deleted file mode 100644 index 78ccff527..000000000 --- a/sfaira/data/human/skin/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_skin import DatasetGroupSkin diff --git a/sfaira/data/human/skin/external.py b/sfaira/data/human/skin/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/skin/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/skin/human_skin.py b/sfaira/data/human/skin/human_skin.py deleted file mode 100644 index db470536f..000000000 --- a/sfaira/data/human/skin/human_skin.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_skin_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_skin_2020_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupSkin(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupSkin - self.datasets.update(DatasetGroupSkin(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py deleted file mode 100644 index 1e17922c3..000000000 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py +++ /dev/null @@ -1,91 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_skin_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'skin' - self.sub_tissue = 'FetalSkin' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'Basal cell': 'Basal cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Epithelial cell': 'Epithelial cell', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal fibroblast': 'Fetal fibroblast', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal skeletal muscle cell': 'Fetal skeletal muscle cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Kidney intercalated cell': 'Kidney intercalated cell', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "skin", "hcl_FetalSkin_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py deleted file mode 100644 index f05d0e8e8..000000000 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py +++ /dev/null @@ -1,91 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_skin_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'skin' - self.sub_tissue = 'FetalSkin' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'Basal cell': 'Basal cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Epithelial cell': 'Epithelial cell', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal fibroblast': 'Fetal fibroblast', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal skeletal muscle cell': 'Fetal skeletal muscle cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Kidney intercalated cell': 'Kidney intercalated cell', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "skin", "hcl_FetalSkin_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/spinalcord/__init__.py b/sfaira/data/human/spinalcord/__init__.py deleted file mode 100644 index 449651e79..000000000 --- a/sfaira/data/human/spinalcord/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_spinalcord import DatasetGroupSpinalcord diff --git a/sfaira/data/human/spinalcord/external.py b/sfaira/data/human/spinalcord/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/spinalcord/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/spinalcord/human_spinalcord.py b/sfaira/data/human/spinalcord/human_spinalcord.py deleted file mode 100644 index 386ee4d15..000000000 --- a/sfaira/data/human/spinalcord/human_spinalcord.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_spinalcord_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupSpinalcord(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupSpinalcord - self.datasets.update(DatasetGroupSpinalcord(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py deleted file mode 100644 index afa0de19e..000000000 --- a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_spinalcord_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'spinalcord' - self.sub_tissue = 'FetalSpinalCord' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "spinalcord", "hcl_FetalSpinalCord_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/spleen/__init__.py b/sfaira/data/human/spleen/__init__.py deleted file mode 100644 index 88890eff3..000000000 --- a/sfaira/data/human/spleen/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_spleen import DatasetGroupSpleen diff --git a/sfaira/data/human/spleen/external.py b/sfaira/data/human/spleen/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/spleen/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/spleen/human_spleen.py b/sfaira/data/human/spleen/human_spleen.py deleted file mode 100644 index 5a6f95804..000000000 --- a/sfaira/data/human/spleen/human_spleen.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_spleen_2019_10x_madissoon_001 import Dataset as Dataset0001 -from .human_spleen_2020_microwell_han_001 import Dataset as Dataset0002 -from .human_spleen_2020_microwell_han_002 import Dataset as Dataset0003 - - -class DatasetGroupSpleen(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupSpleen - self.datasets.update(DatasetGroupSpleen(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py deleted file mode 100644 index ab6dc0b09..000000000 --- a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py +++ /dev/null @@ -1,95 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_spleen_2019_10x_madissoon_001_10.1101/741405" - self.download_website = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" - self.download_website_meta = None - self.organ = "spleen" - self.sub_tissue = "spleen" - self.annotated = True - - self.class_maps = { - "0": { - "B_Hypermutation": "B_Hypermutation", - "B_T_doublet": "B_T_doublet", - "B_follicular": "B_follicular", - "B_mantle": "B_mantle", - "CD34_progenitor": "CD34_progenitor", - "DC_1": "DC_1", - "DC_2": "DC_2", - "DC_activated": "DC_activated", - "DC_plasmacytoid": "DC_plasmacytoid", - "ILC": "ILC", - "Macrophage": "Macrophage", - "Monocyte": "Monocyte", - "NK_CD160pos": "NK_CD160pos", - "NK_FCGR3Apos": "NK_FCGR3Apos", - "NK_dividing": "NK_dividing", - "Plasma_IgG": "Plasma_IgG", - "Plasma_IgM": "Plasma_IgM", - "Plasmablast": "Plasmablast", - "Platelet": "Platelet", - "T_CD4_conv": "T_CD4_conv", - "T_CD4_fh": "T_CD4_fh", - "T_CD4_naive": "T_CD4_naive", - "T_CD4_reg": "T_CD4_reg", - "T_CD8_CTL": "T_CD8_CTL", - "T_CD8_MAIT": "T_CD8_MAIT", - "T_CD8_activated": "T_CD8_activated", - "T_CD8_gd": "T_CD8_gd", - "T_cell_dividing": "Proliferating T cell", - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "spleen", "spleen.cellxgene.h5ad") - self.adata = anndata.read(fn) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Meyer" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/741405" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Celltypes'] - self.set_unkown_class_id(ids=["Unknown"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids-HCATisStab7463846', - new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py deleted file mode 100644 index dbcc0eb87..000000000 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_spleen_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Spleen' - self.sub_tissue = 'AdultSpleen' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'B cell': 'B cell', - 'Macrophage': 'Macrophage', - 'T cell': 'T cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Dendritic cell': 'Dendritic cell', - 'CB CD34+': 'CB CD34+', - 'Erythroid cell': 'Erythroid cell', - 'Monocyte': 'Monocyte', - 'Endothelial cell': 'Endothelial cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Proliferating T cell': 'Proliferating T cell', - 'Fibroblast': 'Fibroblast', - 'Stromal cell': 'Stromal cell', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Mast cell': 'Mast cell', - 'Smooth muscle cell': 'Smooth muscle cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "spleen", "hcl_AdultSpleenParenchyma_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py deleted file mode 100644 index 4c44116aa..000000000 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_spleen_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'Spleen' - self.sub_tissue = 'AdultSpleen' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'B cell': 'B cell', - 'Macrophage': 'Macrophage', - 'T cell': 'T cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Dendritic cell': 'Dendritic cell', - 'CB CD34+': 'CB CD34+', - 'Erythroid cell': 'Erythroid cell', - 'Monocyte': 'Monocyte', - 'Endothelial cell': 'Endothelial cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Proliferating T cell': 'Proliferating T cell', - 'Fibroblast': 'Fibroblast', - 'Stromal cell': 'Stromal cell', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Mast cell': 'Mast cell', - 'Smooth muscle cell': 'Smooth muscle cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "spleen", "hcl_AdultSpleen_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/stomach/__init__.py b/sfaira/data/human/stomach/__init__.py deleted file mode 100644 index 0ba1dc728..000000000 --- a/sfaira/data/human/stomach/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_stomach import DatasetGroupStomach diff --git a/sfaira/data/human/stomach/external.py b/sfaira/data/human/stomach/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/stomach/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/stomach/human_stomach.py b/sfaira/data/human/stomach/human_stomach.py deleted file mode 100644 index 537ea659a..000000000 --- a/sfaira/data/human/stomach/human_stomach.py +++ /dev/null @@ -1,44 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_stomach_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_stomach_2020_microwell_han_002 import Dataset as Dataset0002 -from .human_stomach_2020_microwell_han_003 import Dataset as Dataset0003 -from .human_stomach_2020_microwell_han_004 import Dataset as Dataset0004 -from .human_stomach_2020_microwell_han_005 import Dataset as Dataset0005 -from .human_stomach_2020_microwell_han_006 import Dataset as Dataset0006 -from .human_stomach_2020_microwell_han_007 import Dataset as Dataset0007 -from .human_stomach_2020_microwell_han_008 import Dataset as Dataset0008 -from .human_stomach_2020_microwell_han_009 import Dataset as Dataset0009 -from .human_stomach_2020_microwell_han_010 import Dataset as Dataset0010 - - -class DatasetGroupStomach(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path), - Dataset0009(path=path, meta_path=meta_path), - Dataset0010(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupStomach - self.datasets.update(DatasetGroupStomach(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py deleted file mode 100644 index d9100c776..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'AdultStomach' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py deleted file mode 100644 index b5e9fe7e0..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalStomach' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalStomach_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py deleted file mode 100644 index c1a2d80e0..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalIntestine' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py deleted file mode 100644 index 5dc7e5944..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalIntestine' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntetsine_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py deleted file mode 100644 index 9c779b667..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalStomach' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalStomach_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py deleted file mode 100644 index 90f12d3c8..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalIntestine' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py deleted file mode 100644 index c583b2bf7..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_007_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalIntestine' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_5.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py deleted file mode 100644 index 041004ec7..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_008_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'AdultStomach' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py deleted file mode 100644 index 935271988..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_009_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'AdultStomach' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py deleted file mode 100644 index 5cc789fa6..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_010_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalIntestine' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/thymus/__init__.py b/sfaira/data/human/thymus/__init__.py deleted file mode 100644 index 1d0720e38..000000000 --- a/sfaira/data/human/thymus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_thymus import DatasetGroupThymus diff --git a/sfaira/data/human/thymus/external.py b/sfaira/data/human/thymus/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/thymus/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/thymus/human_thymus.py b/sfaira/data/human/thymus/human_thymus.py deleted file mode 100644 index c8d5da0ad..000000000 --- a/sfaira/data/human/thymus/human_thymus.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_thymus_2020_10x_park_001 import Dataset as Dataset0001 -from .human_thymus_2020_microwell_han_001 import Dataset as Dataset0002 -from .human_thymus_2020_microwell_han_002 import Dataset as Dataset0003 - - -class DatasetGroupThymus(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupThymus - self.datasets.update(DatasetGroupThymus(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py deleted file mode 100644 index 295656f14..000000000 --- a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py +++ /dev/null @@ -1,108 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_thymus_2020_10x_park_001_10.1126/science.aay3224" - self.download_website = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" - self.download_website_meta = None - self.organ = "thymus" - self.sub_tissue = "fetal thymus" - self.annotated = True - - self.class_maps = { - "0": { - 'B_memory': 'B_memory', - 'B_naive': 'B_naive', - 'B_plasma': 'B_plasma', - 'B_pro/pre': 'B_pro/pre', - 'CD4+T': 'CD4+T', - 'CD4+Tmem': 'CD4+Tmem', - 'CD8+T': 'CD8+T', - 'CD8+Tmem': 'CD8+Tmem', - 'CD8αα': 'CD8αα', - 'DC1': 'DC1', - 'DC2': 'DC2', - 'DN': 'DN', - 'DP': 'DP', - 'ETP': 'ETP', - 'Endo': 'Endo', - 'Epi_GCM2': 'Epi_GCM2', - 'Ery': 'Ery', - 'Fb_1': 'Fb_1', - 'Fb_2': 'Fb_2', - 'Fb_cycling': 'Fb_cycling', - 'ILC3': 'ILC3', - 'Lymph': 'Lymph', - 'Mac': 'Mac', - 'Mast': 'Mast', - 'Mgk': 'Mgk', - 'Mono': 'Mono', - 'NK': 'NK', - 'NKT': 'NKT', - 'NMP': 'NMP', - 'T(agonist)': 'T(agonist)', - 'TEC(myo)': 'TEC(myo)', - 'TEC(neuro)': 'TEC(neuro)', - 'Treg': 'Treg', - 'VSMC': 'VSMC', - 'aDC': 'aDC', - 'cTEC': 'cTEC', - 'mTEC(I)': 'mTEC(I)', - 'mTEC(II)': 'mTEC(II)', - 'mTEC(III)': 'mTEC(III)', - 'mTEC(IV)': 'mTEC(IV)', - 'mcTEC': 'mcTEC', - 'pDC': 'pDC', - 'αβT(entry)': 'alpha_beta_T(entry)', - 'γδT': 'gamma_delta_T', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "thymus", "park20.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Teichmann" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1126/science.aay3224" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Anno_level_fig1'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py deleted file mode 100644 index e50d8bf2d..000000000 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py +++ /dev/null @@ -1,75 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_thymus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'thymus' - self.sub_tissue = 'FetalThymus' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Erythroid cell': 'Ery', - 'Erythroid progenitor cell (RP high)': 'Ery', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Macrophage': 'Mac', - 'Monocyte': 'Mono', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Proliferating T cell': 'Proliferating T cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "thymus", "hcl_FetalThymus_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py deleted file mode 100644 index b0a97e9de..000000000 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py +++ /dev/null @@ -1,75 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_thymus_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'thymus' - self.sub_tissue = 'FetalThymus' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Erythroid cell': 'Ery', - 'Erythroid progenitor cell (RP high)': 'Ery', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Macrophage': 'Mac', - 'Monocyte': 'Mono', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Proliferating T cell': 'Proliferating T cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "thymus", "hcl_FetalThymus_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/thyroid/__init__.py b/sfaira/data/human/thyroid/__init__.py deleted file mode 100644 index 5a20c5cab..000000000 --- a/sfaira/data/human/thyroid/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_thyroid import DatasetGroupThyroid diff --git a/sfaira/data/human/thyroid/external.py b/sfaira/data/human/thyroid/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/thyroid/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/thyroid/human_thyroid.py b/sfaira/data/human/thyroid/human_thyroid.py deleted file mode 100644 index a30ad5c92..000000000 --- a/sfaira/data/human/thyroid/human_thyroid.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_thyroid_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_thyroid_2020_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupThyroid(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupThyroid - self.datasets.update(DatasetGroupThyroid(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py deleted file mode 100644 index 65472f513..000000000 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_thyroid_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'thyroid' - self.sub_tissue = 'AdultThyroid' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "thyroid", "hcl_AdultThyroid_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py deleted file mode 100644 index 69cc04769..000000000 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_thyroid_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'thyroid' - self.sub_tissue = 'AdultThyroid' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "thyroid", "hcl_AdultThyroid_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/trachea/__init__.py b/sfaira/data/human/trachea/__init__.py deleted file mode 100644 index 4778fdb5e..000000000 --- a/sfaira/data/human/trachea/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_trachea import DatasetGroupTrachea diff --git a/sfaira/data/human/trachea/external.py b/sfaira/data/human/trachea/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/trachea/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/trachea/human_trachea.py b/sfaira/data/human/trachea/human_trachea.py deleted file mode 100644 index f6b9578c2..000000000 --- a/sfaira/data/human/trachea/human_trachea.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_trachea_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupTrachea(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupTrachea - self.datasets.update(DatasetGroupTrachea(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py deleted file mode 100644 index 25af6f305..000000000 --- a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_trachea_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'trachea' - self.sub_tissue = 'AdultTrachea' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "trachea", "hcl_AdultTrachea_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/ureter/__init__.py b/sfaira/data/human/ureter/__init__.py deleted file mode 100644 index 0d19170e9..000000000 --- a/sfaira/data/human/ureter/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_ureter import DatasetGroupUreter diff --git a/sfaira/data/human/ureter/external.py b/sfaira/data/human/ureter/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/ureter/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/ureter/human_ureter.py b/sfaira/data/human/ureter/human_ureter.py deleted file mode 100644 index 7d3615eff..000000000 --- a/sfaira/data/human/ureter/human_ureter.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_ureter_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupUreter(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupUreter - self.datasets.update(DatasetGroupUreter(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py deleted file mode 100644 index 4a9af9e70..000000000 --- a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_ureter_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'ureter' - self.sub_tissue = 'AdultUreter' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "ureter", "hcl_AdultUreter_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/uterus/__init__.py b/sfaira/data/human/uterus/__init__.py deleted file mode 100644 index 3c82a964f..000000000 --- a/sfaira/data/human/uterus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_uterus import DatasetGroupUterus diff --git a/sfaira/data/human/uterus/external.py b/sfaira/data/human/uterus/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/uterus/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/uterus/human_uterus.py b/sfaira/data/human/uterus/human_uterus.py deleted file mode 100644 index 9feee712d..000000000 --- a/sfaira/data/human/uterus/human_uterus.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_uterus_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupUterus(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupUterus - self.datasets.update(DatasetGroupUterus(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py deleted file mode 100644 index 35a9432e0..000000000 --- a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py +++ /dev/null @@ -1,62 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_uterus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'uterus' - self.sub_tissue = 'AdultUterus' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "uterus", "hcl_AdultUterus_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/interactive/external.py b/sfaira/data/interactive/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/interactive/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/interactive/loader.py b/sfaira/data/interactive/loader.py index 45e6b6d8a..77c1294ac 100644 --- a/sfaira/data/interactive/loader.py +++ b/sfaira/data/interactive/loader.py @@ -1,6 +1,7 @@ import anndata from typing import Union -from .external import DatasetBase + +from sfaira.data import DatasetBase class DatasetInteractive(DatasetBase): @@ -8,37 +9,71 @@ class DatasetInteractive(DatasetBase): def __init__( self, data: anndata.AnnData, - species: str, + organism: str, organ: str, gene_symbol_col: Union[str, None] = 'index', gene_ens_col: Union[str, None] = None, + obs_key_celltypes: Union[str, None] = None, class_maps: dict = {}, - dataset_id: str = "interactive", - **kwargs + dataset_id: str = "interactive_dataset", + path: Union[str, None] = ".", + meta_path: Union[str, None] = ".", + cache_path: Union[str, None] = ".", ): """ + Load data set into sfaira data format. - :param data: - :param species: - :param organ: - :param class_maps: - :param id: - :param kwargs: + :param data: Data set. + :param organism: Organism of data set. + :param organ: Organ of data set. + :param gene_symbol_col: Column name in .var which contains gene symbols. Set to "index" to use the index. + :param gene_ens_col: Column name in .var which contains ENSG symbols. Set to "index" to use the index. + :param obs_key_celltypes: .obs column name which contains cell type labels. + :param class_maps: Cell type class maps. + :param dataset_id: Identifer of data set. + :param path: + :param meta_path: + :param cache_path: """ - DatasetBase.__init__(self=self, path=None, meta_path=None, **kwargs) - self.adata = data - self.species = species + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path) self.id = dataset_id + + self.author = "interactive_dataset" + self.doi = "interactive_dataset" + + self.download = "." + self.download_meta = "." + + # self.age # not currently supported + # self.dev_stage # not currently supported + # self.ethnicity # not currently supported + # self.healthy # not currently supported + # self.normalisation # not currently supported self.organ = organ + self.organism = organism + # self.protocol # not currently supported + # self.sex # not currently supported + # self.state_exact # not currently supported + # self.year # not currently supported - self.gene_symbol_col = gene_symbol_col - self.gene_ensg_col = gene_ens_col + self.obs_key_cellontology_original = obs_key_celltypes + + # self.obs_key_age # not currently supported + # self.obs_key_dev_stage # not currently supported + # self.obs_key_ethnicity # not currently supported + # self.obs_key_healthy # not currently supported + # self.obs_key_organ # not currently supported + # self.obs_key_organism # not currently supported + # self.obs_key_protocol # not currently supported + # self.obs_key_sex # not currently supported + # self.obs_key_state_exact # not currently supported + + self.var_symbol_col = gene_symbol_col + self.var_ensembl_col = gene_ens_col self.class_maps = class_maps + self.adata = data + def _load(self, fn=None): - self._convert_and_set_var_names( - symbol_col=self.gene_symbol_col, - ensembl_col=self.gene_ensg_col, - new_index='ensembl' - ) + pass diff --git a/sfaira/data/mouse/__init__.py b/sfaira/data/mouse/__init__.py deleted file mode 100644 index f1063a851..000000000 --- a/sfaira/data/mouse/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from .bladder import DatasetGroupBladder -from .brain import DatasetGroupBrain -from .diaphragm import DatasetGroupDiaphragm -from .adipose import DatasetGroupAdipose -from .heart import DatasetGroupHeart -from .kidney import DatasetGroupKidney -from .colon import DatasetGroupColon -from .muscle import DatasetGroupMuscle -from .liver import DatasetGroupLiver -from .lung import DatasetGroupLung -from .mammarygland import DatasetGroupMammaryGland -from .bone import DatasetGroupBone -from .femalegonad import DatasetGroupFemalegonad -from .pancreas import DatasetGroupPancreas -from .placenta import DatasetGroupPlacenta -from .blood import DatasetGroupBlood -from .prostate import DatasetGroupProstate -from .rib import DatasetGroupRib -from .ileum import DatasetGroupIleum -from .skin import DatasetGroupSkin -from .spleen import DatasetGroupSpleen -from .stomach import DatasetGroupStomach -from .malegonad import DatasetGroupMalegonad -from .thymus import DatasetGroupThymus -from .tongue import DatasetGroupTongue -from .trachea import DatasetGroupTrachea -from .uterus import DatasetGroupUterus diff --git a/sfaira/data/mouse/adipose/__init__.py b/sfaira/data/mouse/adipose/__init__.py deleted file mode 100644 index c23acef29..000000000 --- a/sfaira/data/mouse/adipose/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_adipose import DatasetGroupAdipose \ No newline at end of file diff --git a/sfaira/data/mouse/adipose/external.py b/sfaira/data/mouse/adipose/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/adipose/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/adipose/mouse_adipose.py b/sfaira/data/mouse/adipose/mouse_adipose.py deleted file mode 100644 index 3493d3d21..000000000 --- a/sfaira/data/mouse/adipose/mouse_adipose.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_adipose_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_adipose_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_adipose_2019_smartseq2_pisco_002 import Dataset as Dataset0003 -from .mouse_adipose_2019_smartseq2_pisco_003 import Dataset as Dataset0004 -from .mouse_adipose_2019_smartseq2_pisco_004 import Dataset as Dataset0005 - - -class DatasetGroupAdipose(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupAdipose - self.datasets.update(DatasetGroupAdipose(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py deleted file mode 100644 index 7c72f9c4e..000000000 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "adipose" - self.sub_tissue = "adipose" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "Fat_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - self.adata.raw = None - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py deleted file mode 100644 index faad974eb..000000000 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_adipose_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "adipose" - self.sub_tissue = "adipose" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-BAT.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "bat_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py deleted file mode 100644 index 1e386e00c..000000000 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py +++ /dev/null @@ -1,69 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_adipose_2019_smartseq2_pisco_002_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "adipose" - self.sub_tissue = "adipose" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-GAT.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "gat_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py deleted file mode 100644 index 782caa53a..000000000 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_adipose_2019_smartseq2_pisco_003_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "adipose" - self.sub_tissue = "adipose" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-MAT.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "mat_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py deleted file mode 100644 index 656d4e504..000000000 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "adipose" - self.sub_tissue = "adipose" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-SCAT.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "scat_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bladder/__init__.py b/sfaira/data/mouse/bladder/__init__.py deleted file mode 100644 index c868320bb..000000000 --- a/sfaira/data/mouse/bladder/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_bladder import DatasetGroupBladder \ No newline at end of file diff --git a/sfaira/data/mouse/bladder/external.py b/sfaira/data/mouse/bladder/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/bladder/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/bladder/mouse_bladder.py b/sfaira/data/mouse/bladder/mouse_bladder.py deleted file mode 100644 index 93d08570a..000000000 --- a/sfaira/data/mouse/bladder/mouse_bladder.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_bladder_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_bladder_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_bladder_2018_microwell_han_001 import Dataset as Dataset0003 - - -class DatasetGroupBladder(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupBladder - self.datasets.update(DatasetGroupBladder(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py deleted file mode 100644 index 011fa3759..000000000 --- a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py +++ /dev/null @@ -1,80 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_bladder_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "bladder" - self.sub_tissue = "bladder" - self.annotated = True - - self.class_maps = { - "0": { - "Endothelial cell_Ly6c1 high(Bladder)": 'endothelial cell', - "Vascular endothelial cell(Bladder)": 'endothelial cell', - 'Urothelium(Bladder)': 'bladder urothelial cell', - 'Dendritic cell_Cd74 high(Bladder)': 'dendritic cell', - 'Dendritic cell_Lyz2 high(Bladder)': 'dendritic cell', - 'Macrophage_Pf4 high(Bladder)': 'macrophage', - 'NK cell(Bladder)': 'NK cell', - 'Basal epithelial cell(Bladder)': 'basal epithelial cell', - 'Epithelial cell_Upk3a high(Bladder)': 'epithelial cell', - 'Epithelial cell_Gm23935 high(Bladder)': 'epithelial cell', - 'Mesenchymal stromal cell(Bladder)': 'mesenchymal stromal cell', - 'Stromal cell_Dpt high(Bladder)': 'stromal cell', - 'Stromal cell_Car3 high(Bladder)': 'stromal cell', - 'Smooth muscle cell(Bladder)': 'smooth muscle cell', - 'Vascular smooth muscle progenitor cell(Bladder)': 'smooth muscle cell', - 'Umbrella cell(Bladder)': 'umbrella cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Bladder_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py deleted file mode 100644 index c1483afba..000000000 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_bladder_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "bladder" - self.sub_tissue = "bladder" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "bladder", "Bladder_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py deleted file mode 100644 index a1e1f6500..000000000 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,68 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_bladder_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "bladder" - self.sub_tissue = "bladder" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-facs-processed-official-annotations-Bladder.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "bladder", "Bladder_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/__init__.py b/sfaira/data/mouse/blood/__init__.py deleted file mode 100644 index 6b0e27f4f..000000000 --- a/sfaira/data/mouse/blood/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_blood import DatasetGroupBlood \ No newline at end of file diff --git a/sfaira/data/mouse/blood/external.py b/sfaira/data/mouse/blood/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/blood/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/blood/mouse_blood.py b/sfaira/data/mouse/blood/mouse_blood.py deleted file mode 100644 index e1dfaf511..000000000 --- a/sfaira/data/mouse/blood/mouse_blood.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_blood_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_blood_2018_microwell_han_002 import Dataset as Dataset0002 -from .mouse_blood_2018_microwell_han_003 import Dataset as Dataset0003 -from .mouse_blood_2018_microwell_han_004 import Dataset as Dataset0004 -from .mouse_blood_2018_microwell_han_005 import Dataset as Dataset0005 - - -class DatasetGroupBlood (DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupBlood - self.datasets.update(DatasetGroupBlood(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py deleted file mode 100644 index aa373428b..000000000 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "blood" - self.sub_tissue = "blood" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Igha high(Peripheral_Blood)': 'B cell', - 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', - 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', - 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', - 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', - 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', - 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', - 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', - 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', - 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', - 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', - 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', - 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', - 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', - 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py deleted file mode 100644 index 9088bd83a..000000000 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "blood" - self.sub_tissue = "blood" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Igha high(Peripheral_Blood)': 'B cell', - 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', - 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', - 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', - 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', - 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', - 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', - 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', - 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', - 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', - 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', - 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', - 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', - 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', - 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py deleted file mode 100644 index fcd971a98..000000000 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "blood" - self.sub_tissue = "blood" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Igha high(Peripheral_Blood)': 'B cell', - 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', - 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', - 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', - 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', - 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', - 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', - 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', - 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', - 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', - 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', - 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', - 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', - 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', - 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py deleted file mode 100644 index 204613af0..000000000 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "blood" - self.sub_tissue = "blood" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Igha high(Peripheral_Blood)': 'B cell', - 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', - 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', - 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', - 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', - 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', - 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', - 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', - 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', - 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', - 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', - 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', - 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', - 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', - 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood4_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py deleted file mode 100644 index 33efff1d5..000000000 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "blood" - self.sub_tissue = "blood" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Igha high(Peripheral_Blood)': 'B cell', - 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', - 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', - 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', - 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', - 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', - 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', - 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', - 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', - 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', - 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', - 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', - 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', - 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', - 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood5_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bone/__init__.py b/sfaira/data/mouse/bone/__init__.py deleted file mode 100644 index 9b6ccd006..000000000 --- a/sfaira/data/mouse/bone/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_bone import DatasetGroupBone \ No newline at end of file diff --git a/sfaira/data/mouse/bone/external.py b/sfaira/data/mouse/bone/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/bone/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/bone/mouse_bone.py b/sfaira/data/mouse/bone/mouse_bone.py deleted file mode 100644 index ff7f50125..000000000 --- a/sfaira/data/mouse/bone/mouse_bone.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_bone_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_bone_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_bone_2018_microwell_001 import Dataset as Dataset0003 - - -class DatasetGroupBone(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupBone - self.datasets.update(DatasetGroupBone(path=path, meta_path=meta_path).datasets) - except ImportError: - pass \ No newline at end of file diff --git a/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py b/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py deleted file mode 100644 index f6e14e5cf..000000000 --- a/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py +++ /dev/null @@ -1,80 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_bone_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "bone" - self.sub_tissue = "marrow" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Igkc high(Bone-Marrow)': 'naive B cell', - 'Dendritic cell_H2-Eb1 high(Bone-Marrow)': 'dendritic cell', - 'Dendritic cell_Siglech high(Bone-Marrow)': 'dendritic cell', - 'Macrophage_Ms4a6c high(Bone-Marrow)': 'macrophage', - 'Macrophage_S100a4 high(Bone-Marrow)': 'macrophage', - 'Erythroblast(Bone-Marrow)': 'erythroid progenitor', - 'Mast cell(Bone-Marrow)': 'mast cell', - 'Monocyte_Mif high(Bone-Marrow)': 'monocyte', - 'Monocyte_Prtn3 high(Bone-Marrow)': 'monocyte', - 'Neutrophil progenitor(Bone-Marrow)': 'neutrophil progenitor', - 'Neutrophil_Cebpe high(Bone-Marrow)': 'neutrophil', - 'Neutrophil_Fcnb high(Bone-Marrow)': 'neutrophil', - 'Neutrophil_Mmp8 high(Bone-Marrow)': 'neutrophil', - 'Neutrophil_Ngp high(Bone-Marrow)': 'neutrophil', - 'Hematopoietic stem progenitor cell(Bone-Marrow)': 'hematopoietic precursor cell', - 'Pre-pro B cell(Bone-Marrow)': 'early pro-B cell', - 'T cell_Ms4a4b high(Bone-Marrow)': 'CD4-positive, alpha-beta T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "BoneMarrow1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs['Annotation'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py b/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py deleted file mode 100644 index 0eaae87b9..000000000 --- a/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_bone_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "bone" - self.sub_tissue = "marrow" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-droplet-processed-official-annotations-Marrow.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "bone", "Marrow_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["unknown cell-1", "unknown cell-2"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py deleted file mode 100644 index 6e72e85db..000000000 --- a/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_bone_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "bone" - self.sub_tissue = "marrow" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-facs-processed-official-annotations-Marrow.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "bone", "Marrow_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["unknown", "nan-marrow-needs-subclustering"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/brain/__init__.py b/sfaira/data/mouse/brain/__init__.py deleted file mode 100644 index 499b40bb0..000000000 --- a/sfaira/data/mouse/brain/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_brain import DatasetGroupBrain \ No newline at end of file diff --git a/sfaira/data/mouse/brain/external.py b/sfaira/data/mouse/brain/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/brain/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/brain/mouse_brain.py b/sfaira/data/mouse/brain/mouse_brain.py deleted file mode 100644 index e62393153..000000000 --- a/sfaira/data/mouse/brain/mouse_brain.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_brain_2019_smartseq2_pisco_001 import Dataset as Dataset0001 -from .mouse_brain_2019_smartseq2_pisco_002 import Dataset as Dataset0002 -from .mouse_brain_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_brain_2018_microwell_han_002 import Dataset as Dataset0004 - - -class DatasetGroupBrain(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupBrain - self.datasets.update(DatasetGroupBrain(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py deleted file mode 100644 index eda8e1088..000000000 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py +++ /dev/null @@ -1,78 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_brain_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "brain" - self.sub_tissue = "brain" - self.annotated = True - - self.class_maps = { - "0": { - 'Astroglial cell(Bergman glia)(Brain)': 'Bergmann glial cell', - 'Astrocyte_Atp1b2 high(Brain)': 'astrocyte', - 'Astrocyte_Mfe8 high(Brain)': 'astrocyte', - 'Astrocyte_Pla2g7 high(Brain)': 'astrocyte', - 'Granulocyte_Ngp high(Brain)': 'granulocyte', - 'Hypothalamic ependymal cell(Brain)': 'ependymal cell', - 'Macrophage_Klf2 high(Brain)': 'macrophage', - 'Macrophage_Lyz2 high(Brain)': 'macrophage', - 'Microglia(Brain)': 'microglial cell', - 'Myelinating oligodendrocyte(Brain)': 'oligodendrocyte', - 'Oligodendrocyte precursor cell(Brain)': 'oligodendrocyte precursor cell', - 'Neuron(Brain)': 'neuron', - 'Pan-GABAergic(Brain)': 'GABAergic cell', - 'Schwann cell(Brain)': 'schwann cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py deleted file mode 100644 index 31f7dafdc..000000000 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py +++ /dev/null @@ -1,78 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_brain_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "brain" - self.sub_tissue = "brain" - self.annotated = True - - self.class_maps = { - "0": { - 'Astroglial cell(Bergman glia)(Brain)': 'Bergmann glial cell', - 'Astrocyte_Atp1b2 high(Brain)': 'astrocyte', - 'Astrocyte_Mfe8 high(Brain)': 'astrocyte', - 'Astrocyte_Pla2g7 high(Brain)': 'astrocyte', - 'Granulocyte_Ngp high(Brain)': 'granulocyte', - 'Hypothalamic ependymal cell(Brain)': 'ependymal cell', - 'Macrophage_Klf2 high(Brain)': 'macrophage', - 'Macrophage_Lyz2 high(Brain)': 'macrophage', - 'Microglia(Brain)': 'microglial cell', - 'Myelinating oligodendrocyte(Brain)': 'oligodendrocyte', - 'Oligodendrocyte precursor cell(Brain)': 'oligodendrocyte precursor cell', - 'Neuron(Brain)': 'neuron', - 'Pan-GABAergic(Brain)': 'GABAergic cell', - 'Schwann cell(Brain)': 'schwann cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py deleted file mode 100644 index 04c86cb19..000000000 --- a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py +++ /dev/null @@ -1,86 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_brain_2019_10x_hove_001_10.1038/s41593-019-0393-4" - self.download_website = \ - "www.brainimmuneatlas.org/data_files/toDownload/filtered_gene_bc_matrices_mex_WT_fullAggr.zip" - self.download_website_meta = \ - "www.brainimmuneatlas.org/data_files/toDownload/annot_fullAggr.csv" - self.organ = "brain" - self.sub_tissue = "brain" - self.annotated = True - - self.class_maps = { - "0": { - "Microglia": "microglial cell", - "T/NKT cells": "CD8-positive, alpha-beta T cell", - "Monocytes": "monocyte" - }, - } - - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "matrix.mtx") - fn_barcodes = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "barcodes.tsv") - fn_var = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "genes.tsv") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "annot_fullAggr.csv") - - self.adata = anndata.read_mtx(fn) - self.adata = anndata.AnnData(self.adata.X.T) - var = pandas.read_csv(fn_var, sep="\t", header=None) - var.columns = ["ensembl", "name"] - obs_names = pandas.read_csv(fn_barcodes, sep="\t", header=None)[0].values - assert len(obs_names) == self.adata.shape[0] - assert var.shape[0] == self.adata.shape[1] - obs = pandas.read_csv(self.path + fn_meta) - - # Match annotation to raw data. - obs.index = obs["cell"].values - obs = obs.loc[[x in obs_names for x in obs.index], :] - idx_tokeep = np.where([x in obs.index for x in obs_names])[0] - self.adata = self.adata[idx_tokeep, :] - obs_names = obs_names[idx_tokeep] - idx_map = np.array([obs.index.tolist().index(x) for x in obs_names]) - self.adata = self.adata[idx_map, :] - obs_names = obs_names[idx_map] - - # Assign attributes - self.adata.obs_names = obs_names - self.adata.var = var - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - self.adata.obs = obs - assert np.all(self.adata.obs_names == self.adata.obs["cell"].values) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Movahedi" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41593-019-0393-4" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py deleted file mode 100644 index 543ff2c65..000000000 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import numpy as np -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_brain_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "brain" - self.sub_tissue = "brain" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Non-Myeloid.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "brain", "Brain_Non-Myeloid_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["nan", "Il6 expressing cells"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py deleted file mode 100644 index 16bed27d5..000000000 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import numpy as np -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_brain_2019_smartseq2_pisco_002_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "brain" - self.sub_tissue = "brain" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Myeloid.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "brain", "Brain_Myeloid_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["nan", "Il6 expressing cells"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/colon/__init__.py b/sfaira/data/mouse/colon/__init__.py deleted file mode 100644 index 8e57ba03e..000000000 --- a/sfaira/data/mouse/colon/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_colon import DatasetGroupColon \ No newline at end of file diff --git a/sfaira/data/mouse/colon/external.py b/sfaira/data/mouse/colon/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/colon/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/colon/mouse_colon.py b/sfaira/data/mouse/colon/mouse_colon.py deleted file mode 100644 index 3a64a819a..000000000 --- a/sfaira/data/mouse/colon/mouse_colon.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_colon_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_colon_2019_smartseq2_pisco_001 import Dataset as Dataset0002 - - -class DatasetGroupColon(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupColon - self.datasets.update(DatasetGroupColon(path=path, meta_path=meta_path).datasets) - except ImportError: - pass \ No newline at end of file diff --git a/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py b/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py deleted file mode 100644 index e7f4077d6..000000000 --- a/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_colon_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "colon" - self.sub_tissue = "colon" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_droplet.h5ad") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py deleted file mode 100644 index f1a8d3d71..000000000 --- a/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "colon" - self.sub_tissue = "colon" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-facs-processed-official-annotations-Large_Intestine.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ - self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/diaphragm/__init__.py b/sfaira/data/mouse/diaphragm/__init__.py deleted file mode 100644 index a68701d73..000000000 --- a/sfaira/data/mouse/diaphragm/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_diaphragm import DatasetGroupDiaphragm \ No newline at end of file diff --git a/sfaira/data/mouse/diaphragm/external.py b/sfaira/data/mouse/diaphragm/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/diaphragm/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm.py deleted file mode 100644 index 0a78a5bfc..000000000 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_diaphragm_2019_smartseq2_pisco_001 import Dataset as Dataset0001 - - -class DatasetGroupDiaphragm(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupDiaphragm - self.datasets.update(DatasetGroupDiaphragm(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py deleted file mode 100644 index 061e65439..000000000 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,70 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_diaphragm_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "diaphragm" - self.sub_tissue = "diaphragm" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "diaphragm", "tabula-muris-senis-facs-processed-official-annotations-Diaphragm.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "diaphragm", "Diaphragm_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/femalegonad/__init__.py b/sfaira/data/mouse/femalegonad/__init__.py deleted file mode 100644 index 6cca0c4d4..000000000 --- a/sfaira/data/mouse/femalegonad/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_femalegonad import DatasetGroupFemalegonad \ No newline at end of file diff --git a/sfaira/data/mouse/femalegonad/external.py b/sfaira/data/mouse/femalegonad/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/femalegonad/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/femalegonad/mouse_femalegonad.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad.py deleted file mode 100644 index e8b6fecf6..000000000 --- a/sfaira/data/mouse/femalegonad/mouse_femalegonad.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_femalegonad_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_femalegonad_2018_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupFemalegonad(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupFemalegonad - self.datasets.update(DatasetGroupFemalegonad(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py deleted file mode 100644 index e69c4b146..000000000 --- a/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py +++ /dev/null @@ -1,76 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_femalegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "femalegonad" - self.sub_tissue = "femalegonad" - self.annotated = True - - self.class_maps = { - "0": { - 'Cumulus cell_Car14 high(Ovary)': 'cumulus cell', - 'Cumulus cell_Nupr1 high(Ovary)': 'cumulus cell', - 'Cumulus cell_Ube2c high(Ovary)': 'cumulus cell', - 'Granulosa cell_Inhba high(Ovary)': 'granulosa cell', - 'Granulosa cell_Kctd14 high(Ovary)': 'granulosa cell', - 'Large luteal cell(Ovary)': 'large luteal cell', - 'Macrophage_Lyz2 high(Ovary)': 'macrophage', - 'Marcrophage_Cd74 high(Ovary)': 'macrophage', - 'Ovarian surface epithelium cell(Ovary)': 'epithelial cell of ovarian surface', - 'Ovarian vascular surface endothelium cell(Ovary)': 'endothelial cell of ovarian surface', - 'Small luteal cell(Ovary)': 'small luteal cell', - 'Stroma cell (Ovary)': 'stromal cell', - 'Thecal cell(Ovary)': 'thecal cell', - 'luteal cells(Ovary)': 'luteal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py deleted file mode 100644 index 776162f34..000000000 --- a/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py +++ /dev/null @@ -1,76 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_femalegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "femalegonad" - self.sub_tissue = "femalegonad" - self.annotated = True - - self.class_maps = { - "0": { - 'Cumulus cell_Car14 high(Ovary)': 'cumulus cell', - 'Cumulus cell_Nupr1 high(Ovary)': 'cumulus cell', - 'Cumulus cell_Ube2c high(Ovary)': 'cumulus cell', - 'Granulosa cell_Inhba high(Ovary)': 'granulosa cell', - 'Granulosa cell_Kctd14 high(Ovary)': 'granulosa cell', - 'Large luteal cell(Ovary)': 'large luteal cell', - 'Macrophage_Lyz2 high(Ovary)': 'macrophage', - 'Marcrophage_Cd74 high(Ovary)': 'macrophage', - 'Ovarian surface epithelium cell(Ovary)': 'epithelial cell of ovarian surface', - 'Ovarian vascular surface endothelium cell(Ovary)': 'endothelial cell of ovarian surface', - 'Small luteal cell(Ovary)': 'small luteal cell', - 'Stroma cell (Ovary)': 'stromal cell', - 'Thecal cell(Ovary)': 'thecal cell', - 'luteal cells(Ovary)': 'luteal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/heart/__init__.py b/sfaira/data/mouse/heart/__init__.py deleted file mode 100644 index be8480688..000000000 --- a/sfaira/data/mouse/heart/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_heart import DatasetGroupHeart \ No newline at end of file diff --git a/sfaira/data/mouse/heart/external.py b/sfaira/data/mouse/heart/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/heart/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/heart/mouse_heart.py b/sfaira/data/mouse/heart/mouse_heart.py deleted file mode 100644 index 11b15636e..000000000 --- a/sfaira/data/mouse/heart/mouse_heart.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - - -from .mouse_heart_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_heart_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_heart_2019_smartseq2_pisco_002 import Dataset as Dataset0003 - - -class DatasetGroupHeart(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupHeart - self.datasets.update(DatasetGroupHeart(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py deleted file mode 100644 index e10649e05..000000000 --- a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_heart_2019_10x_pisco_001_10.1101/661728" - - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "heart" - self.sub_tissue = "heart" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "heart", "Heart_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py deleted file mode 100644 index 721e03cee..000000000 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "heart" - self.sub_tissue = "heart" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Heart.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "heart", "Heart_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py deleted file mode 100644 index 53900692f..000000000 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "heart" - self.sub_tissue = "heart" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Aorta.h5ad") - elif self.source == "figshare": - raise ValueError("not defined") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/ileum/__init__.py b/sfaira/data/mouse/ileum/__init__.py deleted file mode 100644 index 89c13450a..000000000 --- a/sfaira/data/mouse/ileum/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_ileum import DatasetGroupIleum \ No newline at end of file diff --git a/sfaira/data/mouse/ileum/external.py b/sfaira/data/mouse/ileum/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/ileum/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/ileum/mouse_ileum.py b/sfaira/data/mouse/ileum/mouse_ileum.py deleted file mode 100644 index 0d01e26df..000000000 --- a/sfaira/data/mouse/ileum/mouse_ileum.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_ileum_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_ileum_2018_microwell_han_002 import Dataset as Dataset0002 -from .mouse_ileum_2018_microwell_han_003 import Dataset as Dataset0003 - - -class DatasetGroupIleum(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupIleum - self.datasets.update(DatasetGroupIleum(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py deleted file mode 100644 index 5363e094c..000000000 --- a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py +++ /dev/null @@ -1,87 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "ileum" - self.sub_tissue = "ileum" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Ighd high(Small-Intestine)': 'B cell', - 'B cell_Igkv12-46 high(Small-Intestine)': 'B cell', - 'B cell_Jchain high(Small-Intestine)': 'B cell', - 'B cell_Ms4a1 high(Small-Intestine)': 'B cell', - 'Columnar epithelium(Small-Intestine)': 'epithelial cell', - 'Dendritic cell_Siglech high(Small-Intestine)': 'dendritic cell', - 'Dendrtic cell_Cst3 high(Small-Intestine)': 'dendritic cell', - 'Epithelial cell_Kcne3 high(Small-Intestine)': 'epithelial cell', - 'Epithelial cell_Sh2d6 high(Small-Intestine)': 'epithelial cell', - 'Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)': 'epithelial cell villi', - 'Macrophage_Apoe high(Small-Intestine)': 'macrophage', - 'Macrophage_Cxcl2 high(Small-Intestine)': 'macrophage', - 'Paneth cell(Small-Intestine)': 'paneth cell', - 'S cell_Chgb high(Small-Intestine)': 'enteroendocrine cell', - 'S cell_Gip high(Small-Intestine)': 'enteroendocrine cell', - 'Stromal cell_Adamdec1 high(Small-Intestine)': 'stromal cell', - 'Stromal cell_Dcn high(Small-Intestine)': 'stromal cell', - 'T cell_Ccl5 high(Small-Intestine)': 'T cell', - 'T cell_Icos high(Small-Intestine)': 'T cell', - 'T cell_Cd7 high(Small-Intestine)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py deleted file mode 100644 index c96944ea9..000000000 --- a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py +++ /dev/null @@ -1,87 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "ileum" - self.sub_tissue = "ileum" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Ighd high(Small-Intestine)': 'B cell', - 'B cell_Igkv12-46 high(Small-Intestine)': 'B cell', - 'B cell_Jchain high(Small-Intestine)': 'B cell', - 'B cell_Ms4a1 high(Small-Intestine)': 'B cell', - 'Columnar epithelium(Small-Intestine)': 'epithelial cell', - 'Dendritic cell_Siglech high(Small-Intestine)': 'dendritic cell', - 'Dendrtic cell_Cst3 high(Small-Intestine)': 'dendritic cell', - 'Epithelial cell_Kcne3 high(Small-Intestine)': 'epithelial cell', - 'Epithelial cell_Sh2d6 high(Small-Intestine)': 'epithelial cell', - 'Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)': 'epithelial cell villi', - 'Macrophage_Apoe high(Small-Intestine)': 'macrophage', - 'Macrophage_Cxcl2 high(Small-Intestine)': 'macrophage', - 'Paneth cell(Small-Intestine)': 'paneth cell', - 'S cell_Chgb high(Small-Intestine)': 'enteroendocrine cell', - 'S cell_Gip high(Small-Intestine)': 'enteroendocrine cell', - 'Stromal cell_Adamdec1 high(Small-Intestine)': 'stromal cell', - 'Stromal cell_Dcn high(Small-Intestine)': 'stromal cell', - 'T cell_Ccl5 high(Small-Intestine)': 'T cell', - 'T cell_Icos high(Small-Intestine)': 'T cell', - 'T cell_Cd7 high(Small-Intestine)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py deleted file mode 100644 index b3e7390aa..000000000 --- a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_ileum_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "ileum" - self.sub_tissue = "ileum" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Ighd high(Small-Intestine)': 'B cell', - 'B cell_Igkv12-46 high(Small-Intestine)': 'B cell', - 'B cell_Jchain high(Small-Intestine)': 'B cell', - 'B cell_Ms4a1 high(Small-Intestine)': 'B cell', - 'Columnar epithelium(Small-Intestine)': 'epithelial cell', - 'Dendritic cell_Siglech high(Small-Intestine)': 'dendritic cell', - 'Dendrtic cell_Cst3 high(Small-Intestine)': 'dendritic cell', - 'Epithelial cell_Kcne3 high(Small-Intestine)': 'epithelial cell', - 'Epithelial cell_Sh2d6 high(Small-Intestine)': 'epithelial cell', - 'Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)': 'epithelial cell villi', - 'Macrophage_Apoe high(Small-Intestine)': 'macrophage', - 'Macrophage_Cxcl2 high(Small-Intestine)': 'macrophage', - 'Paneth cell(Small-Intestine)': 'paneth cell', - 'S cell_Chgb high(Small-Intestine)': 'enteroendocrine cell', - 'S cell_Gip high(Small-Intestine)': 'enteroendocrine cell', - 'Stromal cell_Adamdec1 high(Small-Intestine)': 'stromal cell', - 'Stromal cell_Dcn high(Small-Intestine)': 'stromal cell', - 'T cell_Ccl5 high(Small-Intestine)': 'T cell', - 'T cell_Icos high(Small-Intestine)': 'T cell', - 'T cell_Cd7 high(Small-Intestine)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/kidney/__init__.py b/sfaira/data/mouse/kidney/__init__.py deleted file mode 100644 index 057f45e34..000000000 --- a/sfaira/data/mouse/kidney/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_kidney import DatasetGroupKidney \ No newline at end of file diff --git a/sfaira/data/mouse/kidney/external.py b/sfaira/data/mouse/kidney/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/kidney/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/kidney/mouse_kidney.py b/sfaira/data/mouse/kidney/mouse_kidney.py deleted file mode 100644 index 9f20d3dbb..000000000 --- a/sfaira/data/mouse/kidney/mouse_kidney.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_kidney_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_kidney_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_kidney_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_kidney_2018_microwell_han_002 import Dataset as Dataset0004 - - -class DatasetGroupKidney(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupKidney - self.datasets.update(DatasetGroupKidney(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py deleted file mode 100644 index 7088e667f..000000000 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py +++ /dev/null @@ -1,64 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_kidney_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "kidney" - self.sub_tissue = "kidney" - self.annotated = True - - self.class_maps = { - "0": { - 'Cell in cell cycle(Fetal_Kidney)': 'fetal proliferative cell', - 'Metanephric mesenchyme(Fetal_Kidney)': 'fetal mesenchymal cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py deleted file mode 100644 index 86ace4b56..000000000 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py +++ /dev/null @@ -1,95 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_kidney_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "kidney" - self.sub_tissue = "kidney" - self.annotated = True - - self.class_maps = { - "0": { - 'Adipocyte(Fetal_Kidney)': 'fetal adipocyte', - 'B cell(Kidney)': 'B cell', - 'Dendritic cell_Ccr7 high(Kidney)': 'dendritic cell', - 'Dendritic cell_Cst3 high(Kidney)': 'dendritic cell', - 'Distal collecting duct principal cell_Cldn4 high(Kidney)': 'kidney collecting duct principal cell', - 'Distal collecting duct principal cell_Hsd11b2 high(Kidney)': 'kidney collecting duct principal cell', - 'Distal convoluted tubule_Pvalb high(Kidney)': 'kidney distal convoluted tubule epithelial cell', - 'Distal convoluted tubule_S100g high(Kidney)': 'kidney distal convoluted tubule epithelial cell', - 'Endothelial cell(Kidney)': 'fenestrated cell', - 'Epithelial cell_Cryab high(Kidney)': "epithelial cell", - 'Fenestrated endothelial cell_Plvap high(Kidney)': 'fenestrated cell', - 'Fenestrated endothelial cell_Tm4sf1 high(Kidney)': 'fenestrated cell', - 'Glomerular epithelial cell_Aldh1a2 high(Fetal_Kidney)': 'glomerular epithelial cell', - 'Intercalated cells of collecting duct_Aqp6 high(Kidney)': 'kidney collecting duct epithelial cell', - 'Intercalated cells of collecting duct_Slc26a4 high(Kidney)': 'kidney collecting duct epithelial cell', - 'Macrophage_Ccl4 high (Kidney)': 'macrophage', - 'Macrophage_Lyz2 high(Kidney)': 'macrophage', - 'Metanephric mesenchyme(Fetal_Kidney)': 'fetal mesenchymal cell', - 'Neutrophil progenitor_S100a8 high(Kidney)': 'neutrophil progenitor', - 'Proximal tubule brush border cell(Kidney)': 'brush cell', - 'Proximal tubule cell_Cyp4a14 high(Kidney)': 'epithelial cell of proximal tubule', - 'Proximal tubule cell_Osgin1 high(Kidney)': 'epithelial cell of proximal tubule', - 'S1 proximal tubule cells(Kidney)': 'epithelial cell of proximal tubule', - 'S3 proximal tubule cells(Kidney)': 'epithelial cell of proximal tubule', - 'Stromal cell_Ankrd1 high(Kidney)': 'fibroblast', - 'Stromal cell_Cxcl10 high(Kidney)': 'fibroblast', - 'Stromal cell_Dcn high(Kidney)': 'fibroblast', - 'Stromal cell_Mgp high(Fetal_Kidney)': 'fibroblast', - 'Stromal cell_Mgp high(Kidney)': 'fibroblast', - 'Stromal cell_Ptgds high(Kidney)': 'fibroblast', - 'T cell(Kidney)': 'T cell', - 'Thick ascending limb of the loop of Henle(Kidney)': 'kidney loop of Henle ascending limb epithelial cell', - 'Ureteric epithelium(Kidney)': 'ureteric epithelial cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py deleted file mode 100644 index 67cd214f8..000000000 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py +++ /dev/null @@ -1,75 +0,0 @@ -import anndata -import numpy as np -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_kidney_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "kidney" - self.sub_tissue = "kidney" - self.annotated = True - - self.class_maps = { - "0": { - "kidney capillary endothelial cell": "endothelial cell", - "kidney mesangial cell": "mesangial cell", - "kidney interstitial fibroblast": "interstitial fibroblast", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "kidney", "Kidney_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.set_unkown_class_id(ids=["nan", "kidney cell"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py deleted file mode 100644 index 2e05e00b5..000000000 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,74 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_kidney_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "kidney" - self.sub_tissue = "kidney" - self.annotated = True - - self.class_maps = { - "0": { - "kidney capillary endothelial cell": "endothelial cell", - "kidney mesangial cell": "mesangial cell", - "kidney interstitial fibroblast": "interstitial fibroblast", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-facs-processed-official-annotations-Kidney.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "kidney", "Kidney_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.set_unkown_class_id(ids=["nan", "kidney cell"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/liver/__init__.py b/sfaira/data/mouse/liver/__init__.py deleted file mode 100644 index 4ddaa4d26..000000000 --- a/sfaira/data/mouse/liver/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_liver import DatasetGroupLiver \ No newline at end of file diff --git a/sfaira/data/mouse/liver/external.py b/sfaira/data/mouse/liver/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/liver/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/liver/mouse_liver.py b/sfaira/data/mouse/liver/mouse_liver.py deleted file mode 100644 index 0d166852e..000000000 --- a/sfaira/data/mouse/liver/mouse_liver.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_liver_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_liver_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_liver_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_liver_2018_microwell_han_002 import Dataset as Dataset0004 - - -class DatasetGroupLiver(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupLiver - self.datasets.update(DatasetGroupLiver(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py deleted file mode 100644 index 5a856484d..000000000 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_liver_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Fcmr high(Liver)': 'B cell', - 'B cell_Jchain high(Liver)': 'B cell', - 'Dendritic cell_Cst3 high(Liver)': 'dendritic cell', - 'Dendritic cell_Siglech high(Liver)': 'dendritic cell', - 'Endothelial cell(Liver)': 'endothelial cell of hepatic sinusoid', - 'Epithelial cell(Liver)': "duct epithelial cell", - 'Epithelia cell_Spp1 high(Liver)': "duct epithelial cell", - 'Erythroblast_Hbb-bs high(Liver)': 'erythroblast', - 'Erythroblast_Hbb-bt high(Liver)': 'erythroblast', - 'Granulocyte(Liver)': 'granulocyte', - 'Hepatocyte_Fabp1 high(Liver)': 'hepatocyte', - 'Hepatocyte_mt-Nd4 high(Liver)': 'hepatocyte', - 'Pericentral (PC) hepatocytes(Liver)': 'hepatocyte', - 'Periportal (PP) hepatocyte(Liver)': 'hepatocyte', - 'Kuppfer cell(Liver)': 'Kupffer cell', - 'Macrophage_Chil3 high(Liver)': 'macrophage', - 'Neutrophil_Ngp high(Liver)': 'neutrophil', - 'Stromal cell(Liver)': 'stromal cell', - 'T cell_Gzma high(Liver)': 'T cell', - 'T cell_Trbc2 high(Liver)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py deleted file mode 100644 index c1f4f3b2c..000000000 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py +++ /dev/null @@ -1,76 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_liver_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Jchain high(Liver)': 'B cell', - 'Dendritic cell_Cst3 high(Liver)': 'dendritic cell', - 'Dendritic cell_Siglech high(Liver)': 'dendritic cell', - 'Epithelial cell(Liver)': "duct epithelial cell", - 'Epithelia cell_Spp1 high(Liver)': "duct epithelial cell", - 'Erythroblast_Hbb-bs high(Liver)': 'erythroblast', - 'Hepatocyte_Fabp1 high(Liver)': 'hepatocyte', - 'Pericentral (PC) hepatocytes(Liver)': 'hepatocyte', - 'Periportal (PP) hepatocyte(Liver)': 'hepatocyte', - 'Kuppfer cell(Liver)': 'Kupffer cell', - 'Macrophage_Chil3 high(Liver)': 'macrophage', - 'Stromal cell(Liver)': 'stromal cell', - 'T cell_Gzma high(Liver)': 'T cell', - 'T cell_Trbc2 high(Liver)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py deleted file mode 100644 index e8366603e..000000000 --- a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_liver_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "liver", "Liver_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py deleted file mode 100644 index 73067787a..000000000 --- a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_liver_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-facs-processed-official-annotations-Liver.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "liver", "Liver_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/lung/__init__.py b/sfaira/data/mouse/lung/__init__.py deleted file mode 100644 index dafc0bfc6..000000000 --- a/sfaira/data/mouse/lung/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_lung import DatasetGroupLung \ No newline at end of file diff --git a/sfaira/data/mouse/lung/external.py b/sfaira/data/mouse/lung/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/lung/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/lung/mouse_lung.py b/sfaira/data/mouse/lung/mouse_lung.py deleted file mode 100644 index 9aea13353..000000000 --- a/sfaira/data/mouse/lung/mouse_lung.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_lung_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_lung_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_lung_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_lung_2018_microwell_han_002 import Dataset as Dataset0004 -from .mouse_lung_2018_microwell_han_003 import Dataset as Dataset0005 - - -class DatasetGroupLung(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupLung - self.datasets.update(DatasetGroupLung(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py deleted file mode 100644 index 62bebd82c..000000000 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py +++ /dev/null @@ -1,97 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_lung_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "lung" - self.sub_tissue = "lung" - self.annotated = True - - self.class_maps = { - "0": { - 'AT1 Cell(Lung)': 'alveolar epithelial cell type I', - 'AT2 Cell(Lung)': 'alveolar epithelial cell type II', - 'Alveolar bipotent progenitor(Lung)': 'alveolar bipotent progenitor', - 'Alveolar macrophage_Ear2 high(Lung)': 'alveolar macrophage', - 'Alveolar macrophage_Pclaf high(Lung)': 'alveolar macrophage', - 'B Cell(Lung)': 'B cell', - 'Basophil(Lung)': 'basophil', - 'Ciliated cell(Lung)': 'ciliated cell', - 'Clara Cell(Lung)': 'clara cell', - 'Conventional dendritic cell_Gngt2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_H2-M2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_Mgl2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_Tubb5 high(Lung)': "dendritic cell", - 'Dendritic cell_Naaa high(Lung)': "dendritic cell", - 'Dividing T cells(Lung)': "T cell", - 'Dividing cells(Lung)': 'unknown', - 'Dividing dendritic cells(Lung)': "dendritic cell", - 'Endothelial cell_Kdr high(Lung)': "endothelial cell", - 'Endothelial cell_Tmem100 high(Lung)': "endothelial cell", - 'Endothelial cells_Vwf high(Lung)': "endothelial cell", - 'Eosinophil granulocyte(Lung)': 'eosinophil', - 'Ig−producing B cell(Lung)': 'B cell', - 'Interstitial macrophage(Lung)': 'lung macrophage', - 'Monocyte progenitor cell(Lung)': 'monocyte progenitor', - 'NK Cell(Lung)': 'NK cell', - 'Neutrophil granulocyte(Lung)': 'neutrophil', - 'Nuocyte(Lung)': 'nuocyte', - 'Plasmacytoid dendritic cell(Lung)': "plasmacytoid dendritic cell", - 'Stromal cell_Acta2 high(Lung)': 'stromal cell', - 'Stromal cell_Dcn high(Lung)': 'stromal cell', - 'Stromal cell_Inmt high(Lung)': 'stromal cell', - 'T Cell_Cd8b1 high(Lung)': "CD8-positive, alpha-beta T cell", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py deleted file mode 100644 index bff170a68..000000000 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py +++ /dev/null @@ -1,97 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_lung_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "lung" - self.sub_tissue = "lung" - self.annotated = True - - self.class_maps = { - "0": { - 'AT1 Cell(Lung)': 'alveolar epithelial cell type I', - 'AT2 Cell(Lung)': 'alveolar epithelial cell type II', - 'Alveolar bipotent progenitor(Lung)': 'alveolar bipotent progenitor', - 'Alveolar macrophage_Ear2 high(Lung)': 'alveolar macrophage', - 'Alveolar macrophage_Pclaf high(Lung)': 'alveolar macrophage', - 'B Cell(Lung)': 'B cell', - 'Basophil(Lung)': 'basophil', - 'Ciliated cell(Lung)': 'ciliated cell', - 'Clara Cell(Lung)': 'clara cell', - 'Conventional dendritic cell_Gngt2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_H2-M2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_Mgl2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_Tubb5 high(Lung)': "dendritic cell", - 'Dendritic cell_Naaa high(Lung)': "dendritic cell", - 'Dividing T cells(Lung)': "T cell", - 'Dividing cells(Lung)': 'unknown', - 'Dividing dendritic cells(Lung)': "dendritic cell", - 'Endothelial cell_Kdr high(Lung)': "endothelial cell", - 'Endothelial cell_Tmem100 high(Lung)': "endothelial cell", - 'Endothelial cells_Vwf high(Lung)': "endothelial cell", - 'Eosinophil granulocyte(Lung)': 'eosinophil', - 'Ig−producing B cell(Lung)': 'B cell', - 'Interstitial macrophage(Lung)': 'lung macrophage', - 'Monocyte progenitor cell(Lung)': 'monocyte progenitor', - 'NK Cell(Lung)': 'NK cell', - 'Neutrophil granulocyte(Lung)': 'neutrophil', - 'Nuocyte(Lung)': 'nuocyte', - 'Plasmacytoid dendritic cell(Lung)': "plasmacytoid dendritic cell", - 'Stromal cell_Acta2 high(Lung)': 'stromal cell', - 'Stromal cell_Dcn high(Lung)': 'stromal cell', - 'Stromal cell_Inmt high(Lung)': 'stromal cell', - 'T Cell_Cd8b1 high(Lung)': "CD8-positive, alpha-beta T cell", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py deleted file mode 100644 index a45337414..000000000 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py +++ /dev/null @@ -1,97 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_lung_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "lung" - self.sub_tissue = "lung" - self.annotated = True - - self.class_maps = { - "0": { - 'AT1 Cell(Lung)': 'alveolar epithelial cell type I', - 'AT2 Cell(Lung)': 'alveolar epithelial cell type II', - 'Alveolar bipotent progenitor(Lung)': 'alveolar bipotent progenitor', - 'Alveolar macrophage_Ear2 high(Lung)': 'alveolar macrophage', - 'Alveolar macrophage_Pclaf high(Lung)': 'alveolar macrophage', - 'B Cell(Lung)': 'B cell', - 'Basophil(Lung)': 'basophil', - 'Ciliated cell(Lung)': 'ciliated cell', - 'Clara Cell(Lung)': 'clara cell', - 'Conventional dendritic cell_Gngt2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_H2-M2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_Mgl2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_Tubb5 high(Lung)': "dendritic cell", - 'Dendritic cell_Naaa high(Lung)': "dendritic cell", - 'Dividing T cells(Lung)': "T cell", - 'Dividing cells(Lung)': 'unknown', - 'Dividing dendritic cells(Lung)': "dendritic cell", - 'Endothelial cell_Kdr high(Lung)': "endothelial cell", - 'Endothelial cell_Tmem100 high(Lung)': "endothelial cell", - 'Endothelial cells_Vwf high(Lung)': "endothelial cell", - 'Eosinophil granulocyte(Lung)': 'eosinophil', - 'Ig−producing B cell(Lung)': 'B cell', - 'Interstitial macrophage(Lung)': 'lung macrophage', - 'Monocyte progenitor cell(Lung)': 'monocyte progenitor', - 'NK Cell(Lung)': 'NK cell', - 'Neutrophil granulocyte(Lung)': 'neutrophil', - 'Nuocyte(Lung)': 'nuocyte', - 'Plasmacytoid dendritic cell(Lung)': "plasmacytoid dendritic cell", - 'Stromal cell_Acta2 high(Lung)': 'stromal cell', - 'Stromal cell_Dcn high(Lung)': 'stromal cell', - 'Stromal cell_Inmt high(Lung)': 'stromal cell', - 'T Cell_Cd8b1 high(Lung)': "CD8-positive, alpha-beta T cell", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py deleted file mode 100644 index eabf196af..000000000 --- a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_lung_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "lung" - self.sub_tissue = "lung" - self.annotated = True - - self.class_maps = { - "0": { - "ciliated columnar cell of tracheobronchial tree": "ciliated cell", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "lung", "Lung_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py deleted file mode 100644 index 77b2fa934..000000000 --- a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_lung_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "lung" - self.sub_tissue = "lung" - self.annotated = True - - self.class_maps = { - "0": { - "ciliated columnar cell of tracheobronchial tree": "ciliated cell", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-facs-processed-official-annotations-Lung.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "lung", "Lung_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/malegonad/__init__.py b/sfaira/data/mouse/malegonad/__init__.py deleted file mode 100644 index a56dbc2f4..000000000 --- a/sfaira/data/mouse/malegonad/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_malegonad import DatasetGroupMalegonad \ No newline at end of file diff --git a/sfaira/data/mouse/malegonad/external.py b/sfaira/data/mouse/malegonad/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/malegonad/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/malegonad/mouse_malegonad.py b/sfaira/data/mouse/malegonad/mouse_malegonad.py deleted file mode 100644 index e1818bc07..000000000 --- a/sfaira/data/mouse/malegonad/mouse_malegonad.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_malegonad_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_malegonad_2018_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupMalegonad(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupMalegonad - self.datasets.update(DatasetGroupMalegonad(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py deleted file mode 100644 index acc94d1ed..000000000 --- a/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py +++ /dev/null @@ -1,83 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_malegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "malegonad" - self.sub_tissue = "malegonad" - self.annotated = True - - self.class_maps = { - "0": { - 'Elongating spermatid(Testis)': 'elongating spermatid', - 'Erythroblast_Hbb-bs high(Testis)': 'erythroblast', - 'Leydig cell(Testis)': 'leydig cell', - 'Macrophage_Lyz2 high(Testis)': 'macrophage', - 'Pre-Sertoli cell_Cst9 high(Testis)': 'pre-sertoli cell', - 'Pre-Sertoli cell_Ctsl high(Testis)': 'pre-sertoli cell', - 'Preleptotene spermatogonia(Testis)': 'preleptotene spermatogonia', - 'Sertoli cell(Testis)': 'sertoli cell', - 'Spermatids_1700016P04Rik high(Testis)': 'spermatid', - 'Spermatids_Cst13 high(Testis)': 'spermatid', - 'Spermatids_Hmgb4 high(Testis)': 'spermatid', - 'Spermatids_Tnp1 high(Testis)': 'spermatid', - 'Spermatocyte_1700001F09Rik high(Testis)': 'spermatocyte', - 'Spermatocyte_Cabs1 high(Testis)': 'spermatocyte', - 'Spermatocyte_Calm2 high(Testis)': 'spermatocyte', - 'Spermatocyte_Mesp1 high(Testis)': 'spermatocyte', - 'Spermatocyte_Slc2a3 high(Testis)': 'spermatocyte', - 'Spermatogonia_1700001P01Rik high(Testis)': 'spermatogonia', - 'Spermatogonia_Tbc1d23 high(Testis)': 'spermatogonia' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py deleted file mode 100644 index 7e5d1feea..000000000 --- a/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py +++ /dev/null @@ -1,83 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_malegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "malegonad" - self.sub_tissue = "malegonad" - self.annotated = True - - self.class_maps = { - "0": { - 'Elongating spermatid(Testis)': 'elongating spermatid', - 'Erythroblast_Hbb-bs high(Testis)': 'erythroblast', - 'Leydig cell(Testis)': 'leydig cell', - 'Macrophage_Lyz2 high(Testis)': 'macrophage', - 'Pre-Sertoli cell_Cst9 high(Testis)': 'pre-sertoli cell', - 'Pre-Sertoli cell_Ctsl high(Testis)': 'pre-sertoli cell', - 'Preleptotene spermatogonia(Testis)': 'preleptotene spermatogonia', - 'Sertoli cell(Testis)': 'sertoli cell', - 'Spermatids_1700016P04Rik high(Testis)': 'spermatid', - 'Spermatids_Cst13 high(Testis)': 'spermatid', - 'Spermatids_Hmgb4 high(Testis)': 'spermatid', - 'Spermatids_Tnp1 high(Testis)': 'spermatid', - 'Spermatocyte_1700001F09Rik high(Testis)': 'spermatocyte', - 'Spermatocyte_Cabs1 high(Testis)': 'spermatocyte', - 'Spermatocyte_Calm2 high(Testis)': 'spermatocyte', - 'Spermatocyte_Mesp1 high(Testis)': 'spermatocyte', - 'Spermatocyte_Slc2a3 high(Testis)': 'spermatocyte', - 'Spermatogonia_1700001P01Rik high(Testis)': 'spermatogonia', - 'Spermatogonia_Tbc1d23 high(Testis)': 'spermatogonia' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/mammarygland/__init__.py b/sfaira/data/mouse/mammarygland/__init__.py deleted file mode 100644 index 6a42b03d9..000000000 --- a/sfaira/data/mouse/mammarygland/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_mammarygland import DatasetGroupMammaryGland \ No newline at end of file diff --git a/sfaira/data/mouse/mammarygland/external.py b/sfaira/data/mouse/mammarygland/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/mammarygland/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland.py deleted file mode 100644 index 71fca2ff3..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_mammarygland_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_mammarygland_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_mammarygland_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_mammarygland_2018_microwell_han_002 import Dataset as Dataset0004 -from .mouse_mammarygland_2018_microwell_han_003 import Dataset as Dataset0005 -from .mouse_mammarygland_2018_microwell_han_004 import Dataset as Dataset0006 - - -class DatasetGroupMammaryGland(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupMammaryGland - self.datasets.update(DatasetGroupMammaryGland(path=path, meta_path=meta_path).datasets) - except ImportError: - pass \ No newline at end of file diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py deleted file mode 100644 index c9c33b8b7..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py +++ /dev/null @@ -1,80 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammarygland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammarygland" - self.sub_tissue = "mammarygland" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Jchain high(Mammary-Gland-Virgin)': 'B cell', - 'Dendritic cell_Cst3 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Siglech high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dividing cell(Mammary-Gland-Virgin)': 'proliferative cell', - 'Luminal cell_Krt19 high (Mammary-Gland-Virgin)': 'luminal epithelial cell of mammary gland', - 'Luminal progenitor(Mammary-Gland-Virgin)': 'luminal progenitor cell', - 'Macrophage_C1qc high(Mammary-Gland-Virgin)': 'macrophage', - 'Macrophage_Lyz1 high(Mammary-Gland-Virgin)': 'macrophage', - 'NK cell(Mammary-Gland-Virgin)': 'NK cell', - 'Stem and progenitor cell(Mammary-Gland-Virgin)': 'stem and progenitor cell', - 'Stromal cell_Col3a1 high(Mammary-Gland-Virgin)': 'stromal cell', - 'Stromal cell_Pi16 high(Mammary-Gland-Virgin)': 'stromal cell', - 'T cell_Cd8b1 high(Mammary-Gland-Virgin)': 'T cell', - 'T cell_Ly6c2 high(Mammary-Gland-Virgin)': 'T cell', - 'T-cells_Ctla4 high(Mammary-Gland-Virgin)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py deleted file mode 100644 index 63d95e9d1..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammarygland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammarygland" - self.sub_tissue = "mammarygland" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Jchain high(Mammary-Gland-Virgin)': 'B cell', - 'Dendritic cell_Cst3 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Siglech high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dividing cell(Mammary-Gland-Virgin)': 'proliferative cell', - 'Luminal cell_Krt19 high (Mammary-Gland-Virgin)': 'luminal epithelial cell of mammary gland', - 'Luminal progenitor(Mammary-Gland-Virgin)': 'luminal progenitor cell', - 'Macrophage_C1qc high(Mammary-Gland-Virgin)': 'macrophage', - 'Macrophage_Lyz1 high(Mammary-Gland-Virgin)': 'macrophage', - 'NK cell(Mammary-Gland-Virgin)': 'NK cell', - 'Stem and progenitor cell(Mammary-Gland-Virgin)': 'stem and progenitor cell', - 'Stromal cell_Col3a1 high(Mammary-Gland-Virgin)': 'stromal cell', - 'Stromal cell_Pi16 high(Mammary-Gland-Virgin)': 'stromal cell', - 'T cell_Cd8b1 high(Mammary-Gland-Virgin)': 'T cell', - 'T cell_Ly6c2 high(Mammary-Gland-Virgin)': 'T cell', - 'T-cells_Ctla4 high(Mammary-Gland-Virgin)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py deleted file mode 100644 index 59e75d075..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammarygland" - self.sub_tissue = "mammarygland" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Jchain high(Mammary-Gland-Virgin)': 'B cell', - 'Dendritic cell_Cst3 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Siglech high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dividing cell(Mammary-Gland-Virgin)': 'proliferative cell', - 'Luminal cell_Krt19 high (Mammary-Gland-Virgin)': 'luminal epithelial cell of mammary gland', - 'Luminal progenitor(Mammary-Gland-Virgin)': 'luminal progenitor cell', - 'Macrophage_C1qc high(Mammary-Gland-Virgin)': 'macrophage', - 'Macrophage_Lyz1 high(Mammary-Gland-Virgin)': 'macrophage', - 'NK cell(Mammary-Gland-Virgin)': 'NK cell', - 'Stem and progenitor cell(Mammary-Gland-Virgin)': 'stem and progenitor cell', - 'Stromal cell_Col3a1 high(Mammary-Gland-Virgin)': 'stromal cell', - 'Stromal cell_Pi16 high(Mammary-Gland-Virgin)': 'stromal cell', - 'T cell_Cd8b1 high(Mammary-Gland-Virgin)': 'T cell', - 'T cell_Ly6c2 high(Mammary-Gland-Virgin)': 'T cell', - 'T-cells_Ctla4 high(Mammary-Gland-Virgin)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py deleted file mode 100644 index 85f714899..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammarygland" - self.sub_tissue = "mammarygland" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Jchain high(Mammary-Gland-Virgin)': 'B cell', - 'Dendritic cell_Cst3 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Siglech high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dividing cell(Mammary-Gland-Virgin)': 'proliferative cell', - 'Luminal cell_Krt19 high (Mammary-Gland-Virgin)': 'luminal epithelial cell of mammary gland', - 'Luminal progenitor(Mammary-Gland-Virgin)': 'luminal progenitor cell', - 'Macrophage_C1qc high(Mammary-Gland-Virgin)': 'macrophage', - 'Macrophage_Lyz1 high(Mammary-Gland-Virgin)': 'macrophage', - 'NK cell(Mammary-Gland-Virgin)': 'NK cell', - 'Stem and progenitor cell(Mammary-Gland-Virgin)': 'stem and progenitor cell', - 'Stromal cell_Col3a1 high(Mammary-Gland-Virgin)': 'stromal cell', - 'Stromal cell_Pi16 high(Mammary-Gland-Virgin)': 'stromal cell', - 'T cell_Cd8b1 high(Mammary-Gland-Virgin)': 'T cell', - 'T cell_Ly6c2 high(Mammary-Gland-Virgin)': 'T cell', - 'T-cells_Ctla4 high(Mammary-Gland-Virgin)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin4_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py deleted file mode 100644 index ba044e9f2..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py +++ /dev/null @@ -1,69 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammarygland_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "mammarygland" - self.sub_tissue = "mammarygland" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-droplet-processed-official-annotations-Mammary_Gland.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py deleted file mode 100644 index acddb5514..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,70 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammarygland_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "mammarygland" - self.sub_tissue = "mammarygland" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-facs-processed-official-annotations-Mammary_Gland.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ - self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/muscle/__init__.py b/sfaira/data/mouse/muscle/__init__.py deleted file mode 100644 index fa8cb5cfd..000000000 --- a/sfaira/data/mouse/muscle/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_muscle import DatasetGroupMuscle \ No newline at end of file diff --git a/sfaira/data/mouse/muscle/external.py b/sfaira/data/mouse/muscle/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/muscle/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/muscle/mouse_muscle.py b/sfaira/data/mouse/muscle/mouse_muscle.py deleted file mode 100644 index 1bf0eddcc..000000000 --- a/sfaira/data/mouse/muscle/mouse_muscle.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_muscle_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_muscle_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_muscle_2018_microwell_han_001 import Dataset as Dataset0003 - - -class DatasetGroupMuscle(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupMuscle - self.datasets.update(DatasetGroupMuscle(path=path, meta_path=meta_path).datasets) - except ImportError: - pass \ No newline at end of file diff --git a/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py deleted file mode 100644 index 53ad4269d..000000000 --- a/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "muscle" - self.sub_tissue = "muscle" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Jchain high(Muscle)': 'B cell', - 'B cell_Vpreb3 high(Muscle)': 'B cell', - 'Dendritic cell(Muscle)': 'dendritic cell', - 'Endothelial cell(Muscle)': 'endothelial cell', - 'Erythroblast_Car1 high(Muscle)': 'erythroblast', - 'Erythroblast_Car2 high(Muscle)': 'erythroblast', - 'Granulocyte monocyte progenitor cell(Muscle)': 'monocyte progenitor', - 'Macrophage_Ms4a6c high(Muscle)': 'macrophage', - 'Macrophage_Retnla high(Muscle)': 'macrophage', - 'Muscle cell_Tnnc1 high(Muscle)': 'muscle cell', - 'Muscle cell_Tnnc2 high(Muscle)': 'muscle cell', - 'Muscle progenitor cell(Muscle)': 'skeletal muscle satellite cell', - 'Neutrophil_Camp high(Muscle)': 'neutrophil', - 'Neutrophil_Prg2 high(Muscle)': 'neutrophil', - 'Neutrophil_Retnlg high(Muscle)': 'neutrophil', - 'Stromal cell(Muscle)': 'stromal cell', - 'T cell(Muscle)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Muscle_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py deleted file mode 100644 index e3a9b589b..000000000 --- a/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py +++ /dev/null @@ -1,69 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_muscle_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "muscle" - self.sub_tissue = "muscle" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py deleted file mode 100644 index e16e3911c..000000000 --- a/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,69 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_muscle_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "muscle" - self.sub_tissue = "muscle" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-facs-processed-official-annotations-Limb_Muscle.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/__init__.py b/sfaira/data/mouse/pancreas/__init__.py deleted file mode 100644 index 27d79134c..000000000 --- a/sfaira/data/mouse/pancreas/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_pancreas import DatasetGroupPancreas \ No newline at end of file diff --git a/sfaira/data/mouse/pancreas/external.py b/sfaira/data/mouse/pancreas/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/pancreas/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas.py b/sfaira/data/mouse/pancreas/mouse_pancreas.py deleted file mode 100644 index ea87d9d50..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_pancreas_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_pancreas_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_pancreas_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_pancreas_2019_10x_thompson_001 import Dataset as Dataset0004 -from .mouse_pancreas_2019_10x_thompson_002 import Dataset as Dataset0005 -from .mouse_pancreas_2019_10x_thompson_003 import Dataset as Dataset0006 -from .mouse_pancreas_2019_10x_thompson_004 import Dataset as Dataset0007 -from .mouse_pancreas_2019_10x_thompson_005 import Dataset as Dataset0008 -from .mouse_pancreas_2019_10x_thompson_006 import Dataset as Dataset0009 -from .mouse_pancreas_2019_10x_thompson_007 import Dataset as Dataset0010 -from .mouse_pancreas_2019_10x_thompson_008 import Dataset as Dataset0011 - - -class DatasetGroupPancreas(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path), - Dataset0009(path=path, meta_path=meta_path), - Dataset0010(path=path, meta_path=meta_path), - Dataset0011(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupPancreas - self.datasets.update(DatasetGroupPancreas(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py deleted file mode 100644 index 71901248d..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py +++ /dev/null @@ -1,86 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'Acinar cell(Pancreas)': 'pancreatic acinar cell', - 'Dendrtic cell(Pancreas)': 'dendritic cell', - 'Ductal cell(Pancreas)': 'pancreatic ductal cell', - 'Endocrine cell(Pancreas)': "endocrine cell", - 'Dividing cell(Pancreas)': "endocrine cell", - 'Endothelial cell_Fabp4 high(Pancreas)': 'endothelial cell', - 'Endothelial cell_Lrg1 high(Pancreas)': 'endothelial cell', - 'Endothelial cell_Tm4sf1 high(Pancreas)': 'endothelial cell', - 'Erythroblast_Hbb-bt high(Pancreas)': 'erythroblast', - 'Erythroblast_Igkc high(Pancreas)': 'erythroblast', - 'Granulocyte(Pancreas)': 'granulocyte', - 'Macrophage_Ly6c2 high(Pancreas)': 'macrophage', - 'Macrophage(Pancreas)': 'macrophage', - 'Glial cell(Pancreas)': 'glial cell', - 'Smooth muscle cell_Acta2 high(Pancreas)': 'smooth muscle cell', - 'Smooth muscle cell_Rgs5 high(Pancreas)': 'smooth muscle cell', - 'Stromal cell_Fn1 high(Pancreas)': 'stromal cell', - 'Stromal cell_Mfap4 high(Pancreas)': 'stromal cell', - 'Stromal cell_Smoc2 high(Pancreas)': 'stromal cell', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'β-cell(Pancreas)': "pancreatic B cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Pancreas_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py deleted file mode 100644 index fd4e43323..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - "pancreatic ductal cel": "pancreatic ductal cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-droplet-processed-official-annotations-Pancreas.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py deleted file mode 100644 index 433ff3de1..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_001_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py deleted file mode 100644 index 378006b63..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_002_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py deleted file mode 100644 index f7773faf1..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_003_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py deleted file mode 100644 index f3117cba6..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_004_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py deleted file mode 100644 index a30f30d8b..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_005_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py deleted file mode 100644 index 3c1873c96..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_006_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py deleted file mode 100644 index d619bf262..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_007_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py deleted file mode 100644 index 5747594ff..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_008_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py deleted file mode 100644 index 8f17750bf..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - "pancreatic ductal cel": "pancreatic ductal cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-facs-processed-official-annotations-Pancreas.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/sfaira/data/mouse/placenta/__init__.py b/sfaira/data/mouse/placenta/__init__.py deleted file mode 100644 index f8363fb61..000000000 --- a/sfaira/data/mouse/placenta/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_placenta import DatasetGroupPlacenta \ No newline at end of file diff --git a/sfaira/data/mouse/placenta/external.py b/sfaira/data/mouse/placenta/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/placenta/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/placenta/mouse_placenta.py b/sfaira/data/mouse/placenta/mouse_placenta.py deleted file mode 100644 index 3c885a94f..000000000 --- a/sfaira/data/mouse/placenta/mouse_placenta.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_placenta_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_placenta_2018_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupPlacenta(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupPlacenta - self.datasets.update(DatasetGroupPlacenta(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py deleted file mode 100644 index a1b4c6a35..000000000 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py +++ /dev/null @@ -1,92 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_placenta_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "placenta" - self.sub_tissue = "placenta" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Placenta)': 'B cell', - 'Basophil(Placenta)': 'basophil', - 'Decidual stromal cell(Placenta)': 'decidual stromal cell', - 'Dendritic cell(Placenta)': 'dendritic cell', - 'Endodermal cell_Afp high(Placenta)': 'endodermal cell', - 'Endothelial cell_Maged2 high(Placenta)': 'endothelial cell', - 'Erythroblast_Hbb-y high(Placenta)': 'erythroblast', - 'Granulocyte monocyte progenitors(Placenta)': 'monocyte progenitor', - 'Granulocyte_Neat1 high(Placenta)': 'granulocyte', - 'Granulocyte_S100a9 high(Placenta)': 'granulocyte', - 'HSPC_Lmo2 high(Placenta)': 'HSPC', - 'Invasive spongiotrophoblast(Placenta)': 'invasive spongiotrophoblast', - 'Labyrinthine trophoblast(Placenta)': 'labyrinthine trophoblast', - 'Macrophage_Apoe high(Placenta)': 'macrophage', - 'Macrophage_Spp1 high(Placenta)': 'macrophage', - 'Megakaryocyte progenitor cell(Placenta)': 'megakaryocte', - 'Monocyte(Placenta)': 'monocyte', - 'NK cell(Placenta)': 'NK cell', - 'NKT cell(Placenta)': 'NKT cell', - 'PE lineage cell_Gkn2 high(Placenta)': 'PE lineage cell', - 'PE lineage cell_S100g high(Placenta)': 'PE lineage cell', - 'Progenitor trophoblast_Gjb3 high(Placenta)': 'trophoblast progenitor', - 'Spiral artery trophoblast giant cells(Placenta)': 'spiral artery trophoblast giant cells', - 'Spongiotrophoblast_Hsd11b2 high(Placenta)': 'spongiotrophoblast', - 'Spongiotrophoblast_Phlda2 high(Placenta)': 'spongiotrophoblast', - 'Stromal cell(Placenta)': 'stromal cell', - 'Stromal cell_Acta2 high(Placenta)': 'stromal cell', - 'Trophoblast progenitor_Taf7l high(Placenta)': 'trophoblast progenitor', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py deleted file mode 100644 index 871354a1d..000000000 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py +++ /dev/null @@ -1,92 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_placenta_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "placenta" - self.sub_tissue = "placenta" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Placenta)': 'B cell', - 'Basophil(Placenta)': 'basophil', - 'Decidual stromal cell(Placenta)': 'decidual stromal cell', - 'Dendritic cell(Placenta)': 'dendritic cell', - 'Endodermal cell_Afp high(Placenta)': 'endodermal cell', - 'Endothelial cell_Maged2 high(Placenta)': 'endothelial cell', - 'Erythroblast_Hbb-y high(Placenta)': 'erythroblast', - 'Granulocyte monocyte progenitors(Placenta)': 'monocyte progenitor', - 'Granulocyte_Neat1 high(Placenta)': 'granulocyte', - 'Granulocyte_S100a9 high(Placenta)': 'granulocyte', - 'HSPC_Lmo2 high(Placenta)': 'HSPC', - 'Invasive spongiotrophoblast(Placenta)': 'invasive spongiotrophoblast', - 'Labyrinthine trophoblast(Placenta)': 'labyrinthine trophoblast', - 'Macrophage_Apoe high(Placenta)': 'macrophage', - 'Macrophage_Spp1 high(Placenta)': 'macrophage', - 'Megakaryocyte progenitor cell(Placenta)': 'megakaryocte', - 'Monocyte(Placenta)': 'monocyte', - 'NK cell(Placenta)': 'NK cell', - 'NKT cell(Placenta)': 'NKT cell', - 'PE lineage cell_Gkn2 high(Placenta)': 'PE lineage cell', - 'PE lineage cell_S100g high(Placenta)': 'PE lineage cell', - 'Progenitor trophoblast_Gjb3 high(Placenta)': 'trophoblast progenitor', - 'Spiral artery trophoblast giant cells(Placenta)': 'spiral artery trophoblast giant cells', - 'Spongiotrophoblast_Hsd11b2 high(Placenta)': 'spongiotrophoblast', - 'Spongiotrophoblast_Phlda2 high(Placenta)': 'spongiotrophoblast', - 'Stromal cell(Placenta)': 'stromal cell', - 'Stromal cell_Acta2 high(Placenta)': 'stromal cell', - 'Trophoblast progenitor_Taf7l high(Placenta)': 'trophoblast progenitor', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/prostate/__init__.py b/sfaira/data/mouse/prostate/__init__.py deleted file mode 100644 index 2f35afd48..000000000 --- a/sfaira/data/mouse/prostate/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_prostate import DatasetGroupProstate \ No newline at end of file diff --git a/sfaira/data/mouse/prostate/external.py b/sfaira/data/mouse/prostate/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/prostate/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/prostate/mouse_prostate.py b/sfaira/data/mouse/prostate/mouse_prostate.py deleted file mode 100644 index bd16b0e46..000000000 --- a/sfaira/data/mouse/prostate/mouse_prostate.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_prostate_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_prostate_2018_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupProstate(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupProstate - self.datasets.update(DatasetGroupProstate(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py deleted file mode 100644 index 486ba9b5c..000000000 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py +++ /dev/null @@ -1,70 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_prostate_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "prostate" - self.sub_tissue = "prostate" - self.annotated = True - - self.class_maps = { - "0": { - 'Dendritic cell(Prostate)': 'dendritic cell', - 'Epithelial cell(Prostate)': 'epithelial cell', - 'Glandular epithelium(Prostate)': 'glandular epithelial cell', - 'Prostate gland cell(Prostate)': 'glandular cell', - 'Stromal cell(Prostate)': 'stromal cell', - 'T cell(Prostate)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py deleted file mode 100644 index 9c5a357d7..000000000 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py +++ /dev/null @@ -1,70 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_prostate_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "prostate" - self.sub_tissue = "prostate" - self.annotated = True - - self.class_maps = { - "0": { - 'Dendritic cell(Prostate)': 'dendritic cell', - 'Epithelial cell(Prostate)': 'epithelial cell', - 'Glandular epithelium(Prostate)': 'glandular epithelial cell', - 'Prostate gland cell(Prostate)': 'glandular cell', - 'Stromal cell(Prostate)': 'stromal cell', - 'T cell(Prostate)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/rib/__init__.py b/sfaira/data/mouse/rib/__init__.py deleted file mode 100644 index d8f73f181..000000000 --- a/sfaira/data/mouse/rib/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_rib import DatasetGroupRib \ No newline at end of file diff --git a/sfaira/data/mouse/rib/external.py b/sfaira/data/mouse/rib/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/rib/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/rib/mouse_rib.py b/sfaira/data/mouse/rib/mouse_rib.py deleted file mode 100644 index 1320a5e8c..000000000 --- a/sfaira/data/mouse/rib/mouse_rib.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_rib_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_rib_2018_microwell_han_002 import Dataset as Dataset0002 -from .mouse_rib_2018_microwell_han_003 import Dataset as Dataset0003 - - -class DatasetGroupRib(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupRib - self.datasets.update(DatasetGroupRib(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py deleted file mode 100644 index 358d4b053..000000000 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py +++ /dev/null @@ -1,87 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_rib_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "rib" - self.sub_tissue = "rib" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Neonatal-Rib)': 'B cell', - 'Cartilage cell_Clu high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Col2a1 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Cxcl14 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Ppa1 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Prg4 high(Neonatal-Rib)': 'cartilage cell', - 'Dividing cell(Neonatal-Rib)': 'proliferative cell', - 'Endothelial cell(Neonatal-Rib)': 'endothelial cell', - 'Erythroblast_Hba-a1 high(Neonatal-Rib)': 'erythroblast', - 'Erythroblast_Ttr high(Neonatal-Rib)': 'erythroblast', - 'Granulocyte(Neonatal-Rib)': 'granulocyte', - 'Macrophage_C1qc high(Neonatal-Rib)': 'macrophage', - 'Macrophage_Ctss high(Neonatal-Rib)': 'macrophage', - 'Muscle cell(Neonatal-Rib)': 'muscle cell', - 'Muscle cell_Acta2 high(Neonatal-Rib)': 'muscle cell', - 'Muscle cell_Actc1 high(Neonatal-Rib)': 'muscle cell', - 'Neuron_Mpz high(Neonatal-Rib)': 'neuron', - 'Neuron_Stmn2 high(Neonatal-Rib)': 'neuron', - 'Neutrophil(Neonatal-Rib)': 'neutrophil', - 'Neutrophil_Elane high(Neonatal-Rib)': 'neutrophil', - 'Oligodendrocyte(Neonatal-Rib)': 'oligodendrocyte', - 'Osteoblast(Neonatal-Rib)': 'osteoblast', - 'Osteoclast(Neonatal-Rib)': 'osteoclast', - 'Stromal cell_Acta1 high(Neonatal-Rib)': 'stromal cell', - 'Stromal cell_Tnmd high(Neonatal-Rib)': 'stromal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py deleted file mode 100644 index 1099a3f6c..000000000 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py +++ /dev/null @@ -1,87 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_rib_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "rib" - self.sub_tissue = "rib" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Neonatal-Rib)': 'B cell', - 'Cartilage cell_Clu high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Col2a1 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Cxcl14 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Ppa1 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Prg4 high(Neonatal-Rib)': 'cartilage cell', - 'Dividing cell(Neonatal-Rib)': 'proliferative cell', - 'Endothelial cell(Neonatal-Rib)': 'endothelial cell', - 'Erythroblast_Hba-a1 high(Neonatal-Rib)': 'erythroblast', - 'Erythroblast_Ttr high(Neonatal-Rib)': 'erythroblast', - 'Granulocyte(Neonatal-Rib)': 'granulocyte', - 'Macrophage_C1qc high(Neonatal-Rib)': 'macrophage', - 'Macrophage_Ctss high(Neonatal-Rib)': 'macrophage', - 'Muscle cell(Neonatal-Rib)': 'muscle cell', - 'Muscle cell_Acta2 high(Neonatal-Rib)': 'muscle cell', - 'Muscle cell_Actc1 high(Neonatal-Rib)': 'muscle cell', - 'Neuron_Mpz high(Neonatal-Rib)': 'neuron', - 'Neuron_Stmn2 high(Neonatal-Rib)': 'neuron', - 'Neutrophil(Neonatal-Rib)': 'neutrophil', - 'Neutrophil_Elane high(Neonatal-Rib)': 'neutrophil', - 'Oligodendrocyte(Neonatal-Rib)': 'oligodendrocyte', - 'Osteoblast(Neonatal-Rib)': 'osteoblast', - 'Osteoclast(Neonatal-Rib)': 'osteoclast', - 'Stromal cell_Acta1 high(Neonatal-Rib)': 'stromal cell', - 'Stromal cell_Tnmd high(Neonatal-Rib)': 'stromal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py deleted file mode 100644 index e1a4f8264..000000000 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py +++ /dev/null @@ -1,87 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_rib_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "rib" - self.sub_tissue = "rib" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Neonatal-Rib)': 'B cell', - 'Cartilage cell_Clu high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Col2a1 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Cxcl14 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Ppa1 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Prg4 high(Neonatal-Rib)': 'cartilage cell', - 'Dividing cell(Neonatal-Rib)': 'proliferative cell', - 'Endothelial cell(Neonatal-Rib)': 'endothelial cell', - 'Erythroblast_Hba-a1 high(Neonatal-Rib)': 'erythroblast', - 'Erythroblast_Ttr high(Neonatal-Rib)': 'erythroblast', - 'Granulocyte(Neonatal-Rib)': 'granulocyte', - 'Macrophage_C1qc high(Neonatal-Rib)': 'macrophage', - 'Macrophage_Ctss high(Neonatal-Rib)': 'macrophage', - 'Muscle cell(Neonatal-Rib)': 'muscle cell', - 'Muscle cell_Acta2 high(Neonatal-Rib)': 'muscle cell', - 'Muscle cell_Actc1 high(Neonatal-Rib)': 'muscle cell', - 'Neuron_Mpz high(Neonatal-Rib)': 'neuron', - 'Neuron_Stmn2 high(Neonatal-Rib)': 'neuron', - 'Neutrophil(Neonatal-Rib)': 'neutrophil', - 'Neutrophil_Elane high(Neonatal-Rib)': 'neutrophil', - 'Oligodendrocyte(Neonatal-Rib)': 'oligodendrocyte', - 'Osteoblast(Neonatal-Rib)': 'osteoblast', - 'Osteoclast(Neonatal-Rib)': 'osteoclast', - 'Stromal cell_Acta1 high(Neonatal-Rib)': 'stromal cell', - 'Stromal cell_Tnmd high(Neonatal-Rib)': 'stromal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/skin/__init__.py b/sfaira/data/mouse/skin/__init__.py deleted file mode 100644 index 232177187..000000000 --- a/sfaira/data/mouse/skin/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_skin import DatasetGroupSkin \ No newline at end of file diff --git a/sfaira/data/mouse/skin/external.py b/sfaira/data/mouse/skin/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/skin/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/skin/mouse_skin.py b/sfaira/data/mouse/skin/mouse_skin.py deleted file mode 100644 index dba84de3a..000000000 --- a/sfaira/data/mouse/skin/mouse_skin.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_skin_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_skin_2019_smartseq2_pisco_001 import Dataset as Dataset0002 - - -class DatasetGroupSkin(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupSkin - self.datasets.update(DatasetGroupSkin(path=path, meta_path=meta_path).datasets) - except ImportError: - pass \ No newline at end of file diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py deleted file mode 100644 index 6d1889244..000000000 --- a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_skin_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "skin" - self.sub_tissue = "skin" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-droplet-processed-official-annotations-Skin.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "skin", "Skin_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py deleted file mode 100644 index 6b8f05078..000000000 --- a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_skin_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "skin" - self.sub_tissue = "skin" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-facs-processed-official-annotations-Skin.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/spleen/__init__.py b/sfaira/data/mouse/spleen/__init__.py deleted file mode 100644 index 90981c60b..000000000 --- a/sfaira/data/mouse/spleen/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_spleen import DatasetGroupSpleen \ No newline at end of file diff --git a/sfaira/data/mouse/spleen/external.py b/sfaira/data/mouse/spleen/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/spleen/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/spleen/mouse_spleen.py b/sfaira/data/mouse/spleen/mouse_spleen.py deleted file mode 100644 index 05ff9439c..000000000 --- a/sfaira/data/mouse/spleen/mouse_spleen.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_spleen_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_spleen_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_spleen_2018_microwell_han_001 import Dataset as Dataset0003 - - -class DatasetGroupSpleen(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupSpleen - self.datasets.update(DatasetGroupSpleen(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py deleted file mode 100644 index 3eac25aa7..000000000 --- a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py +++ /dev/null @@ -1,75 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_spleen_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "spleen" - self.sub_tissue = "spleen" - self.annotated = True - - self.class_maps = { - "0": { - 'Erythroblast(Spleen)': 'proerythroblast', - 'Dendritic cell_S100a4 high(Spleen)': 'dendritic cell', - 'Dendritic cell_Siglech high(Spleen)': 'dendritic cell', - 'Granulocyte(Spleen)': 'granulocyte', - 'Macrophage(Spleen)': 'macrophage', - 'Monocyte(Spleen)': 'monocyte', - 'NK cell(Spleen)': 'NK cell', - 'Neutrophil(Spleen)': 'neutrophil', - 'Plasma cell(Spleen)': 'plasma cell', - 'T cell(Spleen)': 'T cell', - 'Marginal zone B cell(Spleen)': 'B cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Spleen_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py deleted file mode 100644 index 3042be5fc..000000000 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_spleen_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "spleen" - self.sub_tissue = "spleen" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "spleen", "Spleen_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py deleted file mode 100644 index d386c4609..000000000 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_spleen_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "spleen" - self.sub_tissue = "spleen" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "spleen", "Spleen_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/stomach/__init__.py b/sfaira/data/mouse/stomach/__init__.py deleted file mode 100644 index f9f2d2ead..000000000 --- a/sfaira/data/mouse/stomach/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_stomach import DatasetGroupStomach \ No newline at end of file diff --git a/sfaira/data/mouse/stomach/external.py b/sfaira/data/mouse/stomach/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/stomach/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/stomach/mouse_stomach.py b/sfaira/data/mouse/stomach/mouse_stomach.py deleted file mode 100644 index 3fc8ba1ba..000000000 --- a/sfaira/data/mouse/stomach/mouse_stomach.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_stomach_2018_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupStomach(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupStomach - self.datasets.update(DatasetGroupStomach(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py deleted file mode 100644 index 87cce385b..000000000 --- a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_stomach_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "stomach" - self.sub_tissue = "stomach" - self.annotated = True - - self.class_maps = { - "0": { - 'Antral mucous cell (Stomach)': 'antral mucous cell', - 'Dendritic cell(Stomach)': 'dendritic cell', - 'Dividing cell(Stomach)': 'proliferative cell', - 'Epithelial cell_Gkn3 high(Stomach)': 'epithelial cell', - 'Epithelial cell_Krt20 high(Stomach)': 'epithelial cell', - 'Epithelial cell_Pla2g1b high(Stomach)': 'epithelial cell', - 'G cell(Stomach)': 'G cell', - 'Gastric mucosal cell(Stomach)': 'gastric mucosal cell', - 'Macrophage(Stomach)': 'macrophage', - 'Muscle cell(Stomach)': 'muscle cell', - 'Parietal cell (Stomach)': 'parietal cell', - 'Pit cell_Gm26917 high(Stomach)': 'pit cell', - 'Pit cell_Ifrd1 high(Stomach)': 'pit cell', - 'Stomach cell_Gkn2 high(Stomach)': 'stomach cell', - 'Stomach cell_Mt2 high(Stomach)': 'stomach cell', - 'Stomach cell_Muc5ac high(Stomach)': 'stomach cell', - 'Tuft cell(Stomach)': 'tuft cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas", "500more_dge", "Stomach_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/thymus/__init__.py b/sfaira/data/mouse/thymus/__init__.py deleted file mode 100644 index b01b604c8..000000000 --- a/sfaira/data/mouse/thymus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_thymus import DatasetGroupThymus \ No newline at end of file diff --git a/sfaira/data/mouse/thymus/external.py b/sfaira/data/mouse/thymus/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/thymus/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/thymus/mouse_thymus.py b/sfaira/data/mouse/thymus/mouse_thymus.py deleted file mode 100644 index b74e325c1..000000000 --- a/sfaira/data/mouse/thymus/mouse_thymus.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_thymus_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_thymus_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_thymus_2018_microwell_han_001 import Dataset as Dataset0003 - - -class DatasetGroupThymus(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupThymus - self.datasets.update(DatasetGroupThymus(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py deleted file mode 100644 index 602777280..000000000 --- a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py +++ /dev/null @@ -1,70 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_thymus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "thymus" - self.sub_tissue = "thymus" - self.annotated = True - - self.class_maps = { - "0": { - 'abT cell(Thymus)': 'abT cell', - 'B cell(Thymus)': "B cell", - 'DPT cell(Thymus)': "double positive T cell", - 'gdT cell (Thymus)': 'gdT cell', - 'Pre T cell(Thymus)': 'immature T cell', - 'Proliferating thymocyte(Thymus)': "immature T cell", - 'T cell_Id2 high(Thymus)': 'abT cell', # TODO check, not sure about this gene - 'T cell_Ms4a4b high(Thymus)': 'abT cell' # TODO check, not sure about this gene - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Thymus1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py deleted file mode 100644 index 12bfbbe3e..000000000 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_thymus_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "spleen" - self.organ = "thymus" - self.sub_tissue = "thymus" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-droplet-processed-official-annotations-Thymus.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "thymus", "Thymus_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py deleted file mode 100644 index aed1fac7e..000000000 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_thymus_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "spleen" - self.organ = "thymus" - self.sub_tissue = "thymus" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-facs-processed-official-annotations-Thymus.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "thymus", "Thymus_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/tongue/__init__.py b/sfaira/data/mouse/tongue/__init__.py deleted file mode 100644 index 63266537b..000000000 --- a/sfaira/data/mouse/tongue/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_tongue import DatasetGroupTongue \ No newline at end of file diff --git a/sfaira/data/mouse/tongue/external.py b/sfaira/data/mouse/tongue/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/tongue/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/tongue/mouse_tongue.py b/sfaira/data/mouse/tongue/mouse_tongue.py deleted file mode 100644 index a88388371..000000000 --- a/sfaira/data/mouse/tongue/mouse_tongue.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_tongue_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_tongue_2019_smartseq2_pisco_001 import Dataset as Dataset0002 - - -class DatasetGroupTongue(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupTongue - self.datasets.update(DatasetGroupTongue(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py deleted file mode 100644 index 0721142b2..000000000 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_tongue_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "spleen" - self.organ = "tongue" - self.sub_tissue = "tongue" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-droplet-processed-official-annotations-Tongue.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "tongue", "Tongue_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py deleted file mode 100644 index b076e08cf..000000000 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_tongue_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "spleen" - self.organ = "tongue" - self.sub_tissue = "tongue" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-facs-processed-official-annotations-Tongue.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "tongue", "Tongue_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/trachea/__init__.py b/sfaira/data/mouse/trachea/__init__.py deleted file mode 100644 index 3286064dd..000000000 --- a/sfaira/data/mouse/trachea/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_trachea import DatasetGroupTrachea \ No newline at end of file diff --git a/sfaira/data/mouse/trachea/external.py b/sfaira/data/mouse/trachea/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/trachea/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/trachea/mouse_trachea.py b/sfaira/data/mouse/trachea/mouse_trachea.py deleted file mode 100644 index b83539438..000000000 --- a/sfaira/data/mouse/trachea/mouse_trachea.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_trachea_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_trachea_2019_smartseq2_pisco_001 import Dataset as Dataset0002 - - -class DatasetGroupTrachea(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupTrachea - self.datasets.update(DatasetGroupTrachea(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py deleted file mode 100644 index fda917f76..000000000 --- a/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py +++ /dev/null @@ -1,74 +0,0 @@ -import anndata -import numpy as np -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_trachea_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "trachea" - self.sub_tissue = "trachea" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "trachea", "Trachea_droplet.h5ad") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-droplet-processed-official-annotations-Trachea.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "trachea", "Trachea_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py deleted file mode 100644 index e18bb8b6a..000000000 --- a/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import numpy as np -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_trachea_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "trachea" - self.sub_tissue = "trachea" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-facs-processed-official-annotations-Trachea.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "trachea", "Trachea_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/uterus/__init__.py b/sfaira/data/mouse/uterus/__init__.py deleted file mode 100644 index e84f043a1..000000000 --- a/sfaira/data/mouse/uterus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_uterus import DatasetGroupUterus \ No newline at end of file diff --git a/sfaira/data/mouse/uterus/external.py b/sfaira/data/mouse/uterus/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/uterus/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/uterus/mouse_uterus.py b/sfaira/data/mouse/uterus/mouse_uterus.py deleted file mode 100644 index aca5ad69d..000000000 --- a/sfaira/data/mouse/uterus/mouse_uterus.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_uterus_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_uterus_2018_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupUterus(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupUterus - self.datasets.update(DatasetGroupUterus(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py deleted file mode 100644 index 58985d2a0..000000000 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py +++ /dev/null @@ -1,83 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_uterus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "uterus" - self.sub_tissue = "uterus" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Uterus)': 'B cell', - 'Dendritic cell(Uterus)': 'dendritic cell', - 'Endothelial cell_Cldn5 high(Uterus)': 'endothelial cell', - 'Endothelial cell_Tm4sf1 high(Uterus)': 'endothelial cell', - 'Glandular epithelium_Ltf high(Uterus)': 'glandular epithelial cell', - 'Glandular epithelium_Sprr2f high(Uterus)': 'glandular epithelial cell', - 'Granulocyte(Uterus)': 'granulocyte', - 'Keratinocyte(Uterus)': 'keratinocyte', - 'Macrophage(Uterus)': 'macrophage', - 'Monocyte(Uterus)': 'monocyte', - 'Muscle cell_Mgp high(Uterus)': 'muscle cell', - 'Muscle cell_Pcp4 high(Uterus)': 'muscle cell', - 'Smooth muscle cell_Rgs5 high(Uterus)': 'smooth muscle cell', - 'NK cell(Uterus)': 'NK cell', - 'Stromal cell_Ccl11 high(Uterus)': 'stromal cell', - 'Stromal cell_Cxcl14 high(Uterus)': 'stromal cell', - 'Stromal cell_Gm23935 high(Uterus)': 'stromal cell', - 'Stromal cell_Has1 high(Uterus)': 'stromal cell', - 'Stromal cell_Hsd11b2 high(Uterus)': 'stromal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py deleted file mode 100644 index dad5f7194..000000000 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py +++ /dev/null @@ -1,83 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_uterus_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "uterus" - self.sub_tissue = "uterus" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Uterus)': 'B cell', - 'Dendritic cell(Uterus)': 'dendritic cell', - 'Endothelial cell_Cldn5 high(Uterus)': 'endothelial cell', - 'Endothelial cell_Tm4sf1 high(Uterus)': 'endothelial cell', - 'Glandular epithelium_Ltf high(Uterus)': 'glandular epithelial cell', - 'Glandular epithelium_Sprr2f high(Uterus)': 'glandular epithelial cell', - 'Granulocyte(Uterus)': 'granulocyte', - 'Keratinocyte(Uterus)': 'keratinocyte', - 'Macrophage(Uterus)': 'macrophage', - 'Monocyte(Uterus)': 'monocyte', - 'Muscle cell_Mgp high(Uterus)': 'muscle cell', - 'Muscle cell_Pcp4 high(Uterus)': 'muscle cell', - 'Smooth muscle cell_Rgs5 high(Uterus)': 'smooth muscle cell', - 'NK cell(Uterus)': 'NK cell', - 'Stromal cell_Ccl11 high(Uterus)': 'stromal cell', - 'Stromal cell_Cxcl14 high(Uterus)': 'stromal cell', - 'Stromal cell_Gm23935 high(Uterus)': 'stromal cell', - 'Stromal cell_Has1 high(Uterus)': 'stromal cell', - 'Stromal cell_Hsd11b2 high(Uterus)': 'stromal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/utils/create_meta.py b/sfaira/data/utils/create_meta.py new file mode 100644 index 000000000..bb707ce16 --- /dev/null +++ b/sfaira/data/utils/create_meta.py @@ -0,0 +1,34 @@ +import sfaira +import sys +import tensorflow as tf + +print(tf.__version__) + + +def write_meta(args0, args1): + args0.write_meta(fn_meta=None, dir_out=args1, fn_data=None) + return None + + +# Set global variables. +print("sys.argv", sys.argv) + +path = str(sys.argv[1]) +path_meta = str(sys.argv[2]) +processes = int(str(sys.argv[3])) + +ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( + path=path, meta_path=path_meta, cache_path=path_meta +) +dsg = ds.flatten() # need to flatten in this case to parallelise across Groups and not just within. +dsg.load( + celltype_version=None, + annotated_only=False, + match_to_reference=None, + remove_gene_version=True, + load_raw=True, + allow_caching=False, + processes=processes, + func=write_meta, + kwargs_func={"args1": path_meta}, +) diff --git a/sfaira/data/utils/create_meta_human.py b/sfaira/data/utils/create_meta_human.py deleted file mode 100644 index 0ba7e49bb..000000000 --- a/sfaira/data/utils/create_meta_human.py +++ /dev/null @@ -1,62 +0,0 @@ -import sys -import tensorflow as tf -from sfaira.data import human - - -print(tf.__version__) - -# Set global variables. -print("sys.argv", sys.argv) - -path = str(sys.argv[1]) -path_meta = str(sys.argv[2]) - -ds_dict = { - "adipose": human.DatasetGroupAdipose(path=path, meta_path=path_meta), - "adrenalgland": human.DatasetGroupAdrenalgland(path=path, meta_path=path_meta), - "mixed": human.DatasetGroupMixed(path=path, meta_path=path_meta), - "artery": human.DatasetGroupArtery(path=path, meta_path=path_meta), - "bladder": human.DatasetGroupBladder(path=path, meta_path=path_meta), - "blood": human.DatasetGroupBlood(path=path, meta_path=path_meta), - "bone": human.DatasetGroupBone(path=path, meta_path=path_meta), - "brain": human.DatasetGroupBrain(path=path, meta_path=path_meta), - "calvaria": human.DatasetGroupCalvaria(path=path, meta_path=path_meta), - "cervix": human.DatasetGroupCervix(path=path, meta_path=path_meta), - "chorionicvillus": human.DatasetGroupChorionicvillus(path=path, meta_path=path_meta), - "colon": human.DatasetGroupColon(path=path, meta_path=path_meta), - "duodenum": human.DatasetGroupDuodenum(path=path, meta_path=path_meta), - "epityphlon": human.DatasetGroupEpityphlon(path=path, meta_path=path_meta), - "esophagus": human.DatasetGroupEsophagus(path=path, meta_path=path_meta), - "eye": human.DatasetGroupEye(path=path, meta_path=path_meta), - "fallopiantube": human.DatasetGroupFallopiantube(path=path, meta_path=path_meta), - "femalegonad": human.DatasetGroupFemalegonad(path=path, meta_path=path_meta), - "gallbladder": human.DatasetGroupGallbladder(path=path, meta_path=path_meta), - "heart": human.DatasetGroupHeart(path=path, meta_path=path_meta), - "hesc": human.DatasetGroupHesc(path=path, meta_path=path_meta), - "ileum": human.DatasetGroupIleum(path=path, meta_path=path_meta), - "jejunum": human.DatasetGroupJejunum(path=path, meta_path=path_meta), - "kidney": human.DatasetGroupKidney(path=path, meta_path=path_meta), - "liver": human.DatasetGroupLiver(path=path, meta_path=path_meta), - "lung": human.DatasetGroupLung(path=path, meta_path=path_meta), - "malegonad": human.DatasetGroupMalegonad(path=path, meta_path=path_meta), - "muscle": human.DatasetGroupMuscle(path=path, meta_path=path_meta), - "omentum": human.DatasetGroupOmentum(path=path, meta_path=path_meta), - "pancreas": human.DatasetGroupPancreas(path=path, meta_path=path_meta), - "placenta": human.DatasetGroupPlacenta(path=path, meta_path=path_meta), - "pleura": human.DatasetGroupPleura(path=path, meta_path=path_meta), - "prostate": human.DatasetGroupProstate(path=path, meta_path=path_meta), - "rectum": human.DatasetGroupRectum(path=path, meta_path=path_meta), - "rib": human.DatasetGroupRib(path=path, meta_path=path_meta), - "skin": human.DatasetGroupSkin(path=path, meta_path=path_meta), - "spinalcord": human.DatasetGroupSpinalcord(path=path, meta_path=path_meta), - "spleen": human.DatasetGroupSpleen(path=path, meta_path=path_meta), - "stomach": human.DatasetGroupStomach(path=path, meta_path=path_meta), - "thymus": human.DatasetGroupThymus(path=path, meta_path=path_meta), - "thyroid": human.DatasetGroupThyroid(path=path, meta_path=path_meta), - "trachea": human.DatasetGroupTrachea(path=path, meta_path=path_meta), - "ureter": human.DatasetGroupUreter(path=path, meta_path=path_meta), - "uterus": human.DatasetGroupUterus(path=path, meta_path=path_meta), -} -for k in list(ds_dict.keys()): - for kk in ds_dict[k].ids: - ds_dict[k].datasets[kk].write_meta(dir_out=path_meta) diff --git a/sfaira/data/utils/create_meta_mouse.py b/sfaira/data/utils/create_meta_mouse.py deleted file mode 100644 index c2ab7c4ca..000000000 --- a/sfaira/data/utils/create_meta_mouse.py +++ /dev/null @@ -1,45 +0,0 @@ -import sys -import tensorflow as tf -from sfaira.data import mouse - - -print(tf.__version__) - -# Set global variables. -print("sys.argv", sys.argv) - -path = str(sys.argv[1]) -path_meta = str(sys.argv[2]) - -ds_dict = { - "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=path_meta), - "bladder": mouse.DatasetGroupBladder(path=path, meta_path=path_meta), - "blood": mouse.DatasetGroupBlood(path=path, meta_path=path_meta), - "bone": mouse.DatasetGroupBone(path=path, meta_path=path_meta), - "brain": mouse.DatasetGroupBrain(path=path, meta_path=path_meta), - "colon": mouse.DatasetGroupColon(path=path, meta_path=path_meta), - "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=path_meta), - "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=path_meta), - "heart": mouse.DatasetGroupHeart(path=path, meta_path=path_meta), - "ileum": mouse.DatasetGroupIleum(path=path, meta_path=path_meta), - "kidney": mouse.DatasetGroupKidney(path=path, meta_path=path_meta), - "liver": mouse.DatasetGroupLiver(path=path, meta_path=path_meta), - "lung": mouse.DatasetGroupLung(path=path, meta_path=path_meta), - "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=path_meta), - "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=path_meta), - "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=path_meta), - "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=path_meta), - "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=path_meta), - "prostate": mouse.DatasetGroupProstate(path=path, meta_path=path_meta), - "rib": mouse.DatasetGroupRib(path=path, meta_path=path_meta), - "skin": mouse.DatasetGroupSkin(path=path, meta_path=path_meta), - "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=path_meta), - "stomach": mouse.DatasetGroupStomach(path=path, meta_path=path_meta), - "thymus": mouse.DatasetGroupThymus(path=path, meta_path=path_meta), - "tongue": mouse.DatasetGroupTongue(path=path, meta_path=path_meta), - "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), - "uterus": mouse.DatasetGroupUterus(path=path, meta_path=path_meta) -} -for k in list(ds_dict.keys()): - for kk in ds_dict[k].ids: - ds_dict[k].datasets[kk].write_meta(dir_out=path_meta) diff --git a/sfaira/data/utils/write_backed_human.py b/sfaira/data/utils/write_backed_human.py index 1788f5e36..ec7b98766 100644 --- a/sfaira/data/utils/write_backed_human.py +++ b/sfaira/data/utils/write_backed_human.py @@ -1,10 +1,7 @@ +import os +import sfaira import sys import tensorflow as tf -import sfaira -import os - -from sfaira.data import human - print(tf.__version__) @@ -16,56 +13,10 @@ genome = str(sys.argv[3]) path_meta = os.path.join(path, "meta") - -ds_dict = { - "adipose": human.DatasetGroupAdipose(path=path, meta_path=path_meta), - "adrenalgland": human.DatasetGroupAdrenalgland(path=path, meta_path=path_meta), - "mixed": human.DatasetGroupMixed(path=path, meta_path=path_meta), - "artery": human.DatasetGroupArtery(path=path, meta_path=path_meta), - "bladder": human.DatasetGroupBladder(path=path, meta_path=path_meta), - "blood": human.DatasetGroupBlood(path=path, meta_path=path_meta), - "bone": human.DatasetGroupBone(path=path, meta_path=path_meta), - "brain": human.DatasetGroupBrain(path=path, meta_path=path_meta), - "calvaria": human.DatasetGroupCalvaria(path=path, meta_path=path_meta), - "cervix": human.DatasetGroupCervix(path=path, meta_path=path_meta), - "chorionicvillus": human.DatasetGroupChorionicvillus(path=path, meta_path=path_meta), - "colon": human.DatasetGroupColon(path=path, meta_path=path_meta), - "duodenum": human.DatasetGroupDuodenum(path=path, meta_path=path_meta), - "epityphlon": human.DatasetGroupEpityphlon(path=path, meta_path=path_meta), - "esophagus": human.DatasetGroupEsophagus(path=path, meta_path=path_meta), - "eye": human.DatasetGroupEye(path=path, meta_path=path_meta), - "fallopiantube": human.DatasetGroupFallopiantube(path=path, meta_path=path_meta), - "femalegonad": human.DatasetGroupFemalegonad(path=path, meta_path=path_meta), - "gallbladder": human.DatasetGroupGallbladder(path=path, meta_path=path_meta), - "heart": human.DatasetGroupHeart(path=path, meta_path=path_meta), - "hesc": human.DatasetGroupHesc(path=path, meta_path=path_meta), - "ileum": human.DatasetGroupIleum(path=path, meta_path=path_meta), - "jejunum": human.DatasetGroupJejunum(path=path, meta_path=path_meta), - "kidney": human.DatasetGroupKidney(path=path, meta_path=path_meta), - "liver": human.DatasetGroupLiver(path=path, meta_path=path_meta), - "lung": human.DatasetGroupLung(path=path, meta_path=path_meta), - "malegonad": human.DatasetGroupMalegonad(path=path, meta_path=path_meta), - "muscle": human.DatasetGroupMuscle(path=path, meta_path=path_meta), - "omentum": human.DatasetGroupOmentum(path=path, meta_path=path_meta), - "pancreas": human.DatasetGroupPancreas(path=path, meta_path=path_meta), - "placenta": human.DatasetGroupPlacenta(path=path, meta_path=path_meta), - "pleura": human.DatasetGroupPleura(path=path, meta_path=path_meta), - "prostate": human.DatasetGroupProstate(path=path, meta_path=path_meta), - "rectum": human.DatasetGroupRectum(path=path, meta_path=path_meta), - "rib": human.DatasetGroupRib(path=path, meta_path=path_meta), - "skin": human.DatasetGroupSkin(path=path, meta_path=path_meta), - "spinalcord": human.DatasetGroupSpinalcord(path=path, meta_path=path_meta), - "spleen": human.DatasetGroupSpleen(path=path, meta_path=path_meta), - "stomach": human.DatasetGroupStomach(path=path, meta_path=path_meta), - "thymus": human.DatasetGroupThymus(path=path, meta_path=path_meta), - "thyroid": human.DatasetGroupThyroid(path=path, meta_path=path_meta), - "trachea": human.DatasetGroupTrachea(path=path, meta_path=path_meta), - "ureter": human.DatasetGroupUreter(path=path, meta_path=path_meta), - "uterus": human.DatasetGroupUterus(path=path, meta_path=path_meta), -} -ds = sfaira.data.DatasetSuperGroup( - dataset_groups=[ds_dict[k] for k in list(ds_dict.keys())] +ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( + path=path, meta_path=path_meta, cache_path=path_meta ) +ds.subset(key="organism", values=["human"]) ds.load_all_tobacked( fn_backed=fn, genome=genome, diff --git a/sfaira/data/utils/write_backed_mouse.py b/sfaira/data/utils/write_backed_mouse.py index a408380e9..e8397186b 100644 --- a/sfaira/data/utils/write_backed_mouse.py +++ b/sfaira/data/utils/write_backed_mouse.py @@ -1,10 +1,7 @@ +import os +import sfaira import sys import tensorflow as tf -import sfaira -import os - -from sfaira.data import mouse - print(tf.__version__) @@ -16,39 +13,10 @@ genome = str(sys.argv[3]) path_meta = os.path.join(path, "meta") - -ds_dict = { - "bladder": mouse.DatasetGroupBladder(path=path, meta_path=path_meta), - "brain": mouse.DatasetGroupBrain(path=path, meta_path=path_meta), - "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=path_meta), - "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=path_meta), - "heart": mouse.DatasetGroupHeart(path=path, meta_path=path_meta), - "kidney": mouse.DatasetGroupKidney(path=path, meta_path=path_meta), - "colon": mouse.DatasetGroupColon(path=path, meta_path=path_meta), - "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=path_meta), - "liver": mouse.DatasetGroupLiver(path=path, meta_path=path_meta), - "lung": mouse.DatasetGroupLung(path=path, meta_path=path_meta), - "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=path_meta), - "bone": mouse.DatasetGroupBone(path=path, meta_path=path_meta), - "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=path_meta), - "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=path_meta), - "blood": mouse.DatasetGroupBlood(path=path, meta_path=path_meta), - "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=path_meta), - "prostate": mouse.DatasetGroupProstate(path=path, meta_path=path_meta), - "rib": mouse.DatasetGroupRib(path=path, meta_path=path_meta), - "skin": mouse.DatasetGroupSkin(path=path, meta_path=path_meta), - "ileum": mouse.DatasetGroupIleum(path=path, meta_path=path_meta), - "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=path_meta), - "stomach": mouse.DatasetGroupStomach(path=path, meta_path=path_meta), - "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=path_meta), - "thymus": mouse.DatasetGroupThymus(path=path, meta_path=path_meta), - "tongue": mouse.DatasetGroupTongue(path=path, meta_path=path_meta), - "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), - "uterus": mouse.DatasetGroupUterus(path=path, meta_path=path_meta), -} -ds = sfaira.data.DatasetSuperGroup( - dataset_groups=[ds_dict[k] for k in list(ds_dict.keys())] +ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( + path=path, meta_path=path_meta, cache_path=path_meta ) +ds.subset(key="organism", values=["mouse"]) ds.load_all_tobacked( fn_backed=fn, genome=genome, diff --git a/sfaira/estimators/__init__.py b/sfaira/estimators/__init__.py index 7e6f3ff03..cbed4e77b 100644 --- a/sfaira/estimators/__init__.py +++ b/sfaira/estimators/__init__.py @@ -1,5 +1,6 @@ from sfaira.estimators.keras import EstimatorKeras, EstimatorKerasEmbedding, EstimatorKerasCelltype + try: - from sfaira_extension.estimators import * + from sfaira_extension.estimators import * # noqa: F403 except ImportError: pass diff --git a/sfaira/estimators/callbacks.py b/sfaira/estimators/callbacks.py index 5121b07a2..690c3d29f 100644 --- a/sfaira/estimators/callbacks.py +++ b/sfaira/estimators/callbacks.py @@ -46,15 +46,15 @@ def on_epoch_end(self, epoch, logs=None): # (epoch + 1, pseudo_inputs.max(), pseudo_inputs.mean(), pseudo_inputs.min())) if epoch == 199: lr = tf.keras.backend.get_value(self.model.optimizer.lr) - tf.keras.backend.set_value(self.model.optimizer.lr, lr/10) + tf.keras.backend.set_value(self.model.optimizer.lr, lr / 10) if self.verbose > 0: - print('\nReduce lr training at epoch %03d to %s' % (epoch + 1, lr/10)) + print('\nReduce lr training at epoch %03d to %s' % (epoch + 1, lr / 10)) if epoch == 249: lr = tf.keras.backend.get_value(self.model.optimizer.lr) - tf.keras.backend.set_value(self.model.optimizer.lr, lr/10) + tf.keras.backend.set_value(self.model.optimizer.lr, lr / 10) if self.verbose > 0: - print('\nReduce lr training at epoch %03d to %s' % (epoch + 1, lr/10)) + print('\nReduce lr training at epoch %03d to %s' % (epoch + 1, lr / 10)) if epoch == 299: self.model.stop_training = True diff --git a/sfaira/estimators/external.py b/sfaira/estimators/external.py index 3e27959ef..19b6fb032 100644 --- a/sfaira/estimators/external.py +++ b/sfaira/estimators/external.py @@ -1,4 +1,4 @@ -from sfaira.versions.celltype_versions import SPECIES_DICT, CelltypeVersionsBase +from sfaira.versions.celltype_versions import ORGANISM_DICT, CelltypeVersionsBase from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.topology_versions import Topologies from sfaira.models.base import BasicModel diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 759eb3d85..a9a04d323 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -40,7 +40,7 @@ def __init__( model_dir: Union[str, None], model_id: Union[str, None], model_class: Union[str, None], - species: Union[str, None], + organism: Union[str, None], organ: Union[str, None], model_type: Union[str, None], model_topology: Union[str, None], @@ -55,12 +55,12 @@ def __init__( self.model_dir = model_dir self.model_id = model_id self.model_class = model_class.lower() - self.species = species.lower() + self.organism = organism.lower() self.organ = organ.lower() self.model_type = model_type.lower() self.model_topology = model_topology self.topology_container = Topologies( - species=species, + organism=organism, model_class=model_class, model_type=model_type, topology_id=model_topology @@ -104,7 +104,7 @@ def load_pretrained_weights(self): ) fn = os.path.join(self.cache_path, f"{self.model_id}_weights.data-00000-of-00001") except HTTPError: - raise FileNotFoundError(f'cannot find remote weightsfile') + raise FileNotFoundError('cannot find remote weightsfile') else: # Local repo if not self.model_dir: @@ -474,7 +474,7 @@ def __init__( data: Union[anndata.AnnData, np.ndarray], model_dir: Union[str, None], model_id: Union[str, None], - species: Union[str, None], + organism: Union[str, None], organ: Union[str, None], model_type: Union[str, None], model_topology: Union[str, None], @@ -482,16 +482,16 @@ def __init__( cache_path: str = os.path.join('cache', '') ): super(EstimatorKerasEmbedding, self).__init__( - data=data, - model_dir=model_dir, - model_id=model_id, - model_class="embedding", - species=species, - organ=organ, - model_type=model_type, - model_topology=model_topology, - weights_md5=weights_md5, - cache_path=cache_path + data=data, + model_dir=model_dir, + model_id=model_id, + model_class="embedding", + organism=organism, + organ=organ, + model_type=model_type, + model_topology=model_topology, + weights_md5=weights_md5, + cache_path=cache_path ) def init_model( @@ -793,7 +793,7 @@ def compute_gradients_input( idx = self.idx_test if self.idx_test is None: num_samples = 10000 - idx = np.random.randint(0,self.data.X.shape[0],num_samples) + idx = np.random.randint(0, self.data.X.shape[0], num_samples) n_obs = len(idx) else: idx = None @@ -820,14 +820,14 @@ def compute_gradients_input( self.model.training_model.input, self.model.encoder_model.output[0] ) - latent_dim = self.model.encoder_model.output[0].shape[1] + latent_dim = self.model.encoder_model.output[0].shape[1] input_dim = self.model.training_model.input[0].shape[1] else: model = tf.keras.Model( self.model.training_model.input, self.model.encoder_model.output ) - latent_dim = self.model.encoder_model.output[0].shape[0] + latent_dim = self.model.encoder_model.output[0].shape[0] input_dim = self.model.training_model.input[0].shape[1] @tf.function @@ -837,14 +837,16 @@ def get_gradients(x_batch): tape.watch(x) model_out = model((x, sf)) if abs_gradients: - f = lambda x: abs(x) + def f(x): + return abs(x) else: - f = lambda x: x + def f(x): + return x # marginalize on batch level and then accumulate batches # batch_jacobian gives output of size: (batch_size, latent_dim, input_dim) batch_gradients = f(tape.batch_jacobian(model_out, x)) return batch_gradients - + for step, (x_batch, y_batch) in tqdm(enumerate(ds), total=np.ceil(n_obs / batch_size)): batch_gradients = get_gradients(x_batch).numpy() _, y = y_batch @@ -857,11 +859,11 @@ def get_gradients(x_batch): if per_celltype: for cell in cell_names: print(f'{cell} with {counts[cell]} observations') - grads_x[cell] = grads_x[cell]/counts[cell] if counts[cell] > 0 else np.zeros((latent_dim, input_dim)) - + grads_x[cell] = grads_x[cell] / counts[cell] if counts[cell] > 0 else np.zeros((latent_dim, input_dim)) + return {'gradients': grads_x, 'counts': counts} else: - return grads_x/n_obs + return grads_x / n_obs class EstimatorKerasCelltype(EstimatorKeras): @@ -876,7 +878,7 @@ def __init__( data: Union[anndata.AnnData, np.ndarray], model_dir: Union[str, None], model_id: Union[str, None], - species: Union[str, None], + organism: Union[str, None], organ: Union[str, None], model_type: Union[str, None], model_topology: Union[str, None], @@ -885,16 +887,16 @@ def __init__( max_class_weight: float = 1e3 ): super(EstimatorKerasCelltype, self).__init__( - data=data, - model_dir=model_dir, - model_id=model_id, - model_class="celltype", - species=species, - organ=organ, - model_type=model_type, - model_topology=model_topology, - weights_md5=weights_md5, - cache_path=cache_path + data=data, + model_dir=model_dir, + model_id=model_id, + model_class="celltype", + organism=organism, + organ=organ, + model_type=model_type, + model_topology=model_topology, + weights_md5=weights_md5, + cache_path=cache_path ) self.max_class_weight = max_class_weight @@ -916,7 +918,7 @@ def init_model( raise ValueError('unknown topology %s for EstimatorKerasCelltype' % self.model_type) self.model = Model( - species=self.species, + organism=self.organism, organ=self.organ, topology_container=self.topology_container, override_hyperpar=override_hyperpar @@ -1196,15 +1198,17 @@ def compute_gradients_input( ) for step, (x_batch, _, _) in enumerate(ds): - print("compute gradients wrt. input: batch %i / %i." % (step+1, np.ceil(n_obs / 64))) + print("compute gradients wrt. input: batch %i / %i." % (step + 1, np.ceil(n_obs / 64))) x = x_batch with tf.GradientTape(persistent=True) as tape: tape.watch(x) model_out = model(x) if abs_gradients: - f = lambda x: abs(x) + def f(x): + return abs(x) else: - f = lambda x: x + def f(x): + return x # marginalize on batch level and then accumulate batches # batch_jacobian gives output of size: (batch_size, latent_dim, input_dim) batch_gradients = f(tape.batch_jacobian(model_out, x).numpy()) diff --git a/sfaira/estimators/losses.py b/sfaira/estimators/losses.py index ee2fc03d0..ad46d3ef9 100644 --- a/sfaira/estimators/losses.py +++ b/sfaira/estimators/losses.py @@ -52,7 +52,7 @@ def call( """Implements the gaussian log likelihood loss as VAE reconstruction loss""" loc, scale = tf.split(y_pred, num_or_size_splits=2, axis=1) - ll = -tf.math.log(scale*tf.math.sqrt(2.*np.pi)) - 0.5*tf.math.square((y_true - loc) / scale) + ll = -tf.math.log(scale * tf.math.sqrt(2. * np.pi)) - 0.5 * tf.math.square((y_true - loc) / scale) ll = tf.clip_by_value(ll, -300, 300, "log_probs") neg_ll = -ll if self.average: diff --git a/sfaira/estimators/metrics.py b/sfaira/estimators/metrics.py index 74ee58b45..864eb0134 100644 --- a/sfaira/estimators/metrics.py +++ b/sfaira/estimators/metrics.py @@ -62,8 +62,8 @@ def __init__(self, name='acc_agg', **kwargs): def update_state(self, y_true, y_pred, sample_weight=None): phat_pos_agg = tf.reduce_sum(y_true * y_pred, axis=1, keepdims=True) acc_agg = tf.cast( - phat_pos_agg > tf.reduce_max((tf.ones_like(y_true) - y_true) * y_pred, axis=1), - dtype=y_true.dtype + phat_pos_agg > tf.reduce_max((tf.ones_like(y_true) - y_true) * y_pred, axis=1), + dtype=y_true.dtype ) # Do not use weighting for accuracy. self.acc_agg.assign_add(tf.reduce_mean(acc_agg)) @@ -86,8 +86,8 @@ def __init__(self, k: int, name='tpr', **kwargs): def update_state(self, y_true, y_pred, sample_weight=None): tp_by_class = tf.reduce_sum(tf.cast( - y_pred == tf.reduce_max(y_pred, axis=1, keepdims=True), - dtype=y_true.dtype + y_pred == tf.reduce_max(y_pred, axis=1, keepdims=True), + dtype=y_true.dtype ) * y_true, axis=0) fn_by_class = tf.reduce_sum(tf.cast( y_pred < tf.reduce_max(y_pred, axis=1, keepdims=True), @@ -147,8 +147,8 @@ def __init__(self, k: int, name='f1', **kwargs): def update_state(self, y_true, y_pred, sample_weight=None): tp_by_class = tf.reduce_sum(tf.cast( - y_pred == tf.reduce_max(y_pred, axis=1, keepdims=True), - dtype=y_true.dtype + y_pred == tf.reduce_max(y_pred, axis=1, keepdims=True), + dtype=y_true.dtype ) * y_true, axis=0) fp_by_class = tf.reduce_sum(tf.cast( y_pred == tf.reduce_max(y_pred, axis=1, keepdims=True), diff --git a/sfaira/genomes/generate_feature_list.py b/sfaira/genomes/generate_feature_list.py index 3cb73d555..838f53e87 100644 --- a/sfaira/genomes/generate_feature_list.py +++ b/sfaira/genomes/generate_feature_list.py @@ -5,11 +5,11 @@ class ExtractFeatureList: gene_table: Union[None, pandas.DataFrame] - species: Union[None, str] + organism: Union[None, str] release: Union[None, str] def __init__(self): - self.species = None + self.organism = None self.release = None self.gene_table = None @@ -39,7 +39,7 @@ def from_ensemble_gtf( :return: """ gtf_name = fn.split("/")[-1] - self.species = gtf_name.split(".")[0] + self.organism = gtf_name.split(".")[0] self.release = "_".join(gtf_name.split(".")[1:-1]) tab = pandas.read_table( @@ -63,4 +63,4 @@ def reduce_types_protein_coding(self): self.reduce_types(types=["protein_coding"]) def write_gene_table_to_csv(self, path): - self.gene_table.to_csv(path_or_buf=path + self.species + "_" + self.release + ".csv") + self.gene_table.to_csv(path_or_buf=path + self.organism + "_" + self.release + ".csv") diff --git a/sfaira/interface/__init__.py b/sfaira/interface/__init__.py index 5e70f72b4..51dee4b72 100644 --- a/sfaira/interface/__init__.py +++ b/sfaira/interface/__init__.py @@ -1 +1,2 @@ +from sfaira.interface.model_zoo import ModelZoo, ModelZooEmbedding, ModelZooCelltype from sfaira.interface.user_interface import UserInterface diff --git a/sfaira/interface/external.py b/sfaira/interface/external.py deleted file mode 100644 index fdb52e721..000000000 --- a/sfaira/interface/external.py +++ /dev/null @@ -1,5 +0,0 @@ -from sfaira.estimators import EstimatorKeras, EstimatorKerasEmbedding, EstimatorKerasCelltype -import sfaira.versions.celltype_versions as celltype_versions -from sfaira.versions.genome_versions import SuperGenomeContainer -from sfaira.versions.topology_versions import Topologies -from sfaira.data.interactive import DatasetInteractive diff --git a/sfaira/interface/model_zoo.py b/sfaira/interface/model_zoo.py index 94612b2d1..2aab74632 100644 --- a/sfaira/interface/model_zoo.py +++ b/sfaira/interface/model_zoo.py @@ -7,7 +7,8 @@ import pandas as pd from typing import List, Union -from .external import celltype_versions, Topologies +from sfaira.versions.celltype_versions import ORGANISM_DICT +from sfaira.versions.topology_versions import Topologies class ModelZoo(abc.ABC): @@ -18,7 +19,7 @@ class ModelZoo(abc.ABC): ontology: dict model_id: Union[str, None] model_class: Union[str, None] - species: Union[str, None] + organism: Union[str, None] organ: Union[str, None] model_class: Union[str, None] model_type: Union[str, None] @@ -37,7 +38,7 @@ def __init__( self.ontology = self.load_ontology_from_model_ids(model_lookuptable['model_id'].values) self.model_id = None self.model_class = None - self.species = None + self.organism = None self.organ = None self.model_type = None self.organisation = None @@ -80,7 +81,7 @@ def set_model_id( self.model_id = model_id ixs = self.model_id.split('_') self.model_class = ixs[0] - self.species = ixs[1] + self.organism = ixs[1] self.organ = ixs[2] self.model_type = ixs[3] self.organisation = ixs[4] @@ -88,7 +89,7 @@ def set_model_id( self.model_version = ixs[6] self.topology_container = Topologies( - species=self.species, + organism=self.organism, model_class=self.model_class, model_type=self.model_type, topology_id=self.model_topology @@ -129,114 +130,115 @@ def call_kipoi(self): with_dataloader=True ) # TODO make sure that this is in line with kipoi_experimental model names # alternatively: - #return kipoi_experimental.get_model("https://github.com/kipoi/models/tree/7d3ea7800184de414aac16811deba6c8eefef2b6/pwm_HOCOMOCO/human/CTCF", source='github-permalink') + # return kipoi_experimental.get_model("https://github.com/kipoi/models/tree/7d3ea7800184de414aac16811deba6c8eefef2b6/pwm_HOCOMOCO/human/CTCF", + # source='github-permalink') - def species(self) -> List[str]: + def organism(self) -> List[str]: """ - Return list of available species. + Return list of available organism. - :return: List of species available. + :return: List of organism available. """ return self.ontology.keys() def organs( self, - species: str + organism: str ) -> List[str]: """ - Return list of available organs for a given species. + Return list of available organs for a given organism. - :param species: Identifier of species to show organs for. + :param organism: Identifier of organism to show organs for. :return: List of organs available. """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - return self.ontology[species].keys() + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + return self.ontology[organism].keys() def models( self, - species: str, + organism: str, organ: str ) -> List[str]: """ - Return list of available models for a given species, organ. + Return list of available models for a given organism, organ. - :param species: Identifier of species to show organs for. + :param organism: Identifier of organism to show organs for. :param organ: Identifier of organ to show versions for. :return: List of models available. """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - assert organ in self.ontology[species].keys(), "organ requested was not found in ontology" - return self.ontology[species][organ].keys() + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" + return self.ontology[organism][organ].keys() def organisation( self, - species: str, + organism: str, organ: str, model_type: str ) -> List[str]: """ - Return list of available organisation that trained a given model for a given species and organ + Return list of available organisation that trained a given model for a given organism and organ - :param species: Identifier of species to show versions for. + :param organism: Identifier of organism to show versions for. :param organ: Identifier of organ to show versions for. :param model_type: Identifier of model to show versions for. :return: List of versions available. """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - assert organ in self.ontology[species].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[species][organ].keys(), "model_type requested was not found in ontology" - return self.ontology[species][organ][model_type] + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" + assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" + return self.ontology[organism][organ][model_type] def topology( self, - species: str, + organism: str, organ: str, model_type: str, organisation: str ) -> List[str]: """ Return list of available model topologies that trained by a given organisation, - a given model for a given species and organ + a given model for a given organism and organ - :param species: Identifier of species to show versions for. + :param organism: Identifier of organism to show versions for. :param organ: Identifier of organ to show versions for. :param model_type: Identifier of model_type to show versions for. :param organisation: Identifier of organisation to show versions for. :return: List of versions available. """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - assert organ in self.ontology[species].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[species][organ].keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[species][organ][model_type].keys(), \ + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" + assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" + assert organisation in self.ontology[organism][organ][model_type].keys(), \ "organisation requested was not found in ontology" - return self.ontology[species][organ][model_type][organisation] + return self.ontology[organism][organ][model_type][organisation] def versions( self, - species: str, + organism: str, organ: str, model_type: str, organisation: str, model_topology: str ) -> List[str]: """ - Return list of available model versions of a given organisation for a given species and organ and model. + Return list of available model versions of a given organisation for a given organism and organ and model. - :param species: Identifier of species to show versions for. + :param organism: Identifier of organism to show versions for. :param organ: Identifier of organ to show versions for. :param model_type: Identifier of model_type to show versions for. :param organisation: Identifier of organisation to show versions for. :param model_topology: Identifier of model_topology to show versions for. :return: List of versions available. """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - assert organ in self.ontology[species].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[species][organ].keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[species][organ][model_type].keys(), \ + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" + assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" + assert organisation in self.ontology[organism][organ][model_type].keys(), \ "organisation requested was not found in ontology" - assert model_topology in self.ontology[species][organ][model_type][organisation].keys(), \ + assert model_topology in self.ontology[organism][organ][model_type][organisation].keys(), \ "model_topology requested was not found in ontology" - return self.ontology[species][organ][model_type][organisation][model_topology] + return self.ontology[organism][organ][model_type][organisation][model_topology] @property def genome(self): @@ -260,7 +262,7 @@ class ModelZooEmbedding(ModelZoo): """ The supported model ontology is: - species -> organ -> model -> organisation -> topology -> version -> ID + organism -> organ -> model -> organisation -> topology -> version -> ID Maybe: include experimental protocol? Ie droplet, full-length, single-nuclei. """ @@ -279,12 +281,12 @@ def load_ontology_from_model_ids( ids = [i for i in model_ids if i.split('_')[0] == 'embedding'] id_df = pd.DataFrame( [i.split('_')[1:7] for i in ids], - columns=['species', 'organ', 'model_type', 'organisation', 'model_topology', 'model_version'] + columns=['organism', 'organ', 'model_type', 'organisation', 'model_topology', 'model_version'] ) - species = np.unique(id_df['species']) - ontology = dict.fromkeys(species) - for g in species: - id_df_g = id_df[id_df.species == g] + organism = np.unique(id_df['organism']) + ontology = dict.fromkeys(organism) + for g in organism: + id_df_g = id_df[id_df.organism == g] organ = np.unique(id_df_g['organ']) ontology[g] = dict.fromkeys(organ) for o in organ: @@ -307,7 +309,7 @@ def load_ontology_from_model_ids( def set_latest( self, - species: str, + organism: str, organ: str, model_type: str, organisation: str, @@ -316,38 +318,38 @@ def set_latest( """ Set model ID to latest model in given ontology group. - :param species: Identifier of species to select. + :param organism: Identifier of organism to select. :param organ: Identifier of organ to select. :param model_type: Identifier of model_type to select. :param organisation: Identifier of organisation to select. :param model_topology: Identifier of model_topology to select :return: """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - assert organ in self.ontology[species].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[species][organ].keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[species][organ][model_type].keys(), \ + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" + assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" + assert organisation in self.ontology[organism][organ][model_type].keys(), \ "organisation requested was not found in ontology" - assert model_topology in self.ontology[species][organ][model_type][organisation].keys(), \ + assert model_topology in self.ontology[organism][organ][model_type][organisation].keys(), \ "model_topology requested was not found in ontology" versions = self.versions( - species=species, + organism=organism, organ=organ, model_type=model_type, organisation=organisation, model_topology=model_topology ) - self.species = species + self.organism = organism self.organ = organ self.model_type = model_type self.organisation = organisation - self.model_topology = model_topology # set to model for now, could be species/organ specific later + self.model_topology = model_topology # set to model for now, could be organism/organ specific later self.model_version = self._order_versions(versions=versions)[0] self.model_id = '_'.join([ 'embedding', - self.species, + self.organism, self.organ, self.model_type, self.organisation, @@ -355,7 +357,7 @@ def set_latest( self.model_version ]) self.topology_container = Topologies( - species=self.species, + organism=self.organism, model_class="embedding", model_type=self.model_type, topology_id=self.model_topology @@ -366,7 +368,7 @@ class ModelZooCelltype(ModelZoo): """ The supported model ontology is: - species -> organ -> model -> organisation -> topology -> version -> ID + organism -> organ -> model -> organisation -> topology -> version -> ID Maybe: include experimental protocol? Ie droplet, full-length, single-nuclei. @@ -388,12 +390,12 @@ def load_ontology_from_model_ids( ids = [i for i in model_ids if i.split('_')[0] == 'celltype'] id_df = pd.DataFrame( [i.split('_')[1:7] for i in ids], - columns=['species', 'organ', 'model_type', 'organisation', 'model_topology', 'model_version'] + columns=['organism', 'organ', 'model_type', 'organisation', 'model_topology', 'model_version'] ) - species = np.unique(id_df['species']) - ontology = dict.fromkeys(species) - for g in species: - id_df_g = id_df[id_df.species == g] + organism = np.unique(id_df['organism']) + ontology = dict.fromkeys(organism) + for g in organism: + id_df_g = id_df[id_df.organism == g] organ = np.unique(id_df_g['organ']) ontology[g] = dict.fromkeys(organ) for o in organ: @@ -416,7 +418,7 @@ def load_ontology_from_model_ids( def set_latest( self, - species: str, + organism: str, organ: str, model_type: str, organisation: str, @@ -425,39 +427,39 @@ def set_latest( """ Set model ID to latest model in given ontology group. - :param species: Identifier of species to select. + :param organism: Identifier of organism to select. :param organ: Identifier of organ to select. :param model_type: Identifier of model_type to select. :param organisation: Identifier of organisation to select. :param model_topology: Identifier of model_topology to select :return: """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - assert organ in self.ontology[species].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[species][organ].keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[species][organ][model_type].keys(), \ + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" + assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" + assert organisation in self.ontology[organism][organ][model_type].keys(), \ "organisation requested was not found in ontology" - assert model_topology in self.ontology[species][organ][model_type][organisation].keys(), \ + assert model_topology in self.ontology[organism][organ][model_type][organisation].keys(), \ "model_topology requested was not found in ontology" versions = self.versions( - species=species, + organism=organism, organ=organ, model_type=model_type, organisation=organisation, model_topology=model_topology ) - self.species = species + self.organism = organism self.organ = organ self.model_type = model_type self.organisation = organisation - self.model_topology = model_topology # set to model for now, could be species/organ specific later + self.model_topology = model_topology # set to model for now, could be organism/organ specific later self.model_version = self._order_versions(versions=versions)[0] self.model_id = '_'.join([ 'celltype', - self.species, + self.organism, self.organ, self.model_type, self.organisation, @@ -465,9 +467,9 @@ def set_latest( self.model_version ]) self.topology_container = Topologies( - species=self.species, + organism=self.organism, model_class="celltype", model_type=self.model_type, topology_id=self.model_topology ) - self.celltypes = celltype_versions.SPECIES_DICT[self.species][self.organ].celltype_universe[self.model_version.split(".")[0]] + self.celltypes = ORGANISM_DICT[self.organism][self.organ].celltype_universe[self.model_version.split(".")[0]] diff --git a/sfaira/interface/user_interface.py b/sfaira/interface/user_interface.py index ef0e1ad70..a7b70a7de 100644 --- a/sfaira/interface/user_interface.py +++ b/sfaira/interface/user_interface.py @@ -9,8 +9,9 @@ from typing import List, Union import warnings -from .external import EstimatorKerasEmbedding, EstimatorKerasCelltype, DatasetInteractive -from .model_zoo import ModelZooEmbedding, ModelZooCelltype +from sfaira.data import DatasetInteractive +from sfaira.estimators import EstimatorKerasEmbedding, EstimatorKerasCelltype +from sfaira.interface.model_zoo import ModelZooEmbedding, ModelZooCelltype class UserInterface: @@ -25,8 +26,8 @@ class UserInterface: # initialise your sfaira instance with a model lookuptable. # instead of setting `custom_repo` when initialising the UI you can also use `sfaira_repo=True` to use public weights ui = sfaira.ui.UserInterface(custom_repo="/path/to/local/repo/folder/or/zenodo/repo/URL", sfaira_repo=False) - ui.zoo_embedding.set_latest(species, organ, model_type, organisation, model_topology) - ui.zoo_celltype.set_latest(species, organ, model_type, organisation, model_topology) + ui.zoo_embedding.set_latest(organism, organ, model_type, organisation, model_topology) + ui.zoo_celltype.set_latest(organism, organ, model_type, organisation, model_topology) ui.load_data(anndata.read("/path/to/file.h5ad")) # load your dataset into sfaira ui.load_model_embedding() ui.load_model_celltype() @@ -142,9 +143,9 @@ def write_lookuptable( if ids: pd.DataFrame( - list(zip(ids_cleaned, model_paths, file_paths, md5)), - columns=['model_id', 'model_path', 'model_file_path', 'md5'] - )\ + list(zip(ids_cleaned, model_paths, file_paths, md5)), + columns=['model_id', 'model_path', 'model_file_path', 'md5'] + )\ .sort_values('model_id')\ .reset_index(drop=True)\ .to_csv(os.path.join(repo_path, 'model_lookuptable.csv')) @@ -171,11 +172,17 @@ def deposit_zenodo( :param zenodo_access_token: Your personal Zenodo API access token. Create one here: https://zenodo.org/account/settings/applications/tokens/new/ :param title: Title of the Zenodo deposition - :param authors: List of dicts, where each dict defines one author (dict keys: name: Name of creator in the format "Family name, Given names", affiliation: Affiliation of creator (optional), orcid: ORCID identifier of creator (optional), gnd: GND identifier of creator (optional) + :param authors: List of dicts, where each dict defines one author (dict keys: + name: Name of creator in the format "Family name, Given names", + affiliation: Affiliation of creator (optional), orcid: ORCID identifier of creator (optional), + gnd: GND identifier of creator (optional) :param description: Description of the Zenodo deposition. - :param metadata: Dictionary with further metadata attributes of the deposit. See the Zenodo API refenrece for accepted keys: https://developers.zenodo.org/#representation - :param publish: Set this to True to directly publish the weights on Zenodo. When set to False a draft will be created, which can be edited in the browser before publishing. - :param sandbox: If True, use the Zenodo testing platform at https://sandbox.zenodo.org for your deposition. We recommend testing your upload with sandbox first as depositions cannot be deleted from the main Zenodo platfowm once created. + :param metadata: Dictionary with further metadata attributes of the deposit. + See the Zenodo API refenrece for accepted keys: https://developers.zenodo.org/#representation + :param publish: Set this to True to directly publish the weights on Zenodo. + When set to False a draft will be created, which can be edited in the browser before publishing. + :param sandbox: If True, use the Zenodo testing platform at https://sandbox.zenodo.org for your deposition. + We recommend testing your upload with sandbox first as depositions cannot be deleted from the main Zenodo platfowm once created. """ import requests @@ -233,7 +240,7 @@ def deposit_zenodo( 'license': 'cc-by-4.0', 'upload_type': 'dataset', 'access_right': 'open' - } + } meta = {**meta_core, **metadata} r = requests.put(f'https://{sandbox}zenodo.org/api/deposit/depositions/{deposition_id}', params=params, @@ -269,22 +276,30 @@ def load_data( self, data: anndata.AnnData, gene_symbol_col: Union[str, None] = None, - gene_ens_col: Union[str, None] = None + gene_ens_col: Union[str, None] = None, + remove_gene_version: bool = True, + match_to_reference: Union[str, None] = None, ): """ Loads the provided AnnData object into sfaira. - If genes in the provided AnnData object are annotated as gene symbols, please provide the name of the corresponding var column (or 'index') through the gene_symbol_col argument. - If genes in the provided AnnData object are annotated as ensembl ids, please provide the name of the corresponding var column (or 'index') through the gene_ens_col argument. + + If genes in the provided AnnData object are annotated as gene symbols, + please provide the name of the corresponding var column (or 'index') through the gene_symbol_col argument. + If genes in the provided AnnData object are annotated as ensembl ids, + please provide the name of the corresponding var column (or 'index') through the gene_ens_col argument. You need to provide at least one of the two. :param data: AnnData object to load :param gene_symbol_col: Var column name (or 'index') which contains gene symbols :param gene_ens_col: ar column name (or 'index') which contains ensembl ids + :param remove_gene_version: Remove gene version string from ENSEMBL ID so that different versions in different + data sets are superimposed. + :param match_to_reference: Reference genomes name. """ - if self.zoo_embedding.species is not None: - species = self.zoo_embedding.species + if self.zoo_embedding.organism is not None: + organism = self.zoo_embedding.organism organ = self.zoo_embedding.organ - elif self.zoo_celltype.species is not None: - species = self.zoo_celltype.species + elif self.zoo_celltype.organism is not None: + organism = self.zoo_celltype.organism organ = self.zoo_celltype.organ else: raise ValueError("Please first set which model_id to use via the model zoo before loading the data") @@ -293,13 +308,20 @@ def load_data( raise ValueError("Please provide either the gene_ens_col or the gene_symbol_col argument.") dataset = DatasetInteractive( - data=data, - species=species, - organ=organ, - gene_symbol_col=gene_symbol_col, - gene_ens_col=gene_ens_col - ) - dataset.load() + data=data, + organism=organism, + organ=organ, + gene_symbol_col=gene_symbol_col, + gene_ens_col=gene_ens_col + ) + dataset.load( + celltype_version=None, + fn=None, + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=False, + allow_caching=False, + ) self.data = dataset.adata def filter_cells(self): @@ -326,7 +348,7 @@ def load_model_embedding(self): data=self.data, model_dir=model_dir, model_id=self.zoo_embedding.model_id, - species=self.zoo_embedding.species, + organism=self.zoo_embedding.organism, organ=self.zoo_embedding.organ, model_type=self.zoo_embedding.model_type, model_topology=self.zoo_embedding.model_topology, @@ -351,7 +373,7 @@ def load_model_celltype(self): data=self.data, model_dir=model_dir, model_id=self.zoo_celltype.model_id, - species=self.zoo_celltype.species, + organism=self.zoo_celltype.organism, organ=self.zoo_celltype.organ, model_type=self.zoo_celltype.model_type, model_topology=self.zoo_celltype.model_topology, diff --git a/sfaira/models/celltype/marker.py b/sfaira/models/celltype/marker.py index c5c2ca03d..ac8d4da39 100644 --- a/sfaira/models/celltype/marker.py +++ b/sfaira/models/celltype/marker.py @@ -11,6 +11,7 @@ class LearnedThresholdLayer(tf.keras.layers.Layer): """ A layer that thresholds the input with a learned threshold. """ + def __init__( self, out_dim, @@ -97,7 +98,7 @@ class CellTypeMarkerVersioned(CellTypeMarker): def __init__( self, - species: str, + organism: str, organ: str, topology_container: Topologies, override_hyperpar: Union[dict, None] = None @@ -110,8 +111,8 @@ def __init__( :param override_hyperpar: Dictionary with hyper-parameters of model to override in preset hyper-parameter dictionary that is queried based on the topology_id. Can contain a subset of all hyperparameters. """ - # Get cell type version instance based on topology ID, species and organ. - self.celltypes_version = celltype_versions.SPECIES_DICT[species.lower()][organ.lower()] + # Get cell type version instance based on topology ID, organism and organ. + self.celltypes_version = celltype_versions.ORGANISM_DICT[organism.lower()][organ.lower()] self.celltypes_version.set_version(version=topology_container.topology_id) unkown_already_included = np.any([x.lower() == "unknown" for x in self.celltypes_version.ids]) @@ -119,8 +120,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - CellTypeMarker.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, out_dim=self.celltypes_version.ntypes if unkown_already_included else self.celltypes_version.ntypes + 1, **hyperpar @@ -131,7 +131,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/celltype/mlp.py b/sfaira/models/celltype/mlp.py index b94fc4226..9b296862c 100644 --- a/sfaira/models/celltype/mlp.py +++ b/sfaira/models/celltype/mlp.py @@ -73,7 +73,7 @@ class CellTypeMlpVersioned(CellTypeMlp): def __init__( self, - species: str, + organism: str, organ: str, topology_container: Topologies, override_hyperpar: Union[dict, None] = None @@ -86,8 +86,8 @@ def __init__( :param override_hyperpar: Dictionary with hyper-parameters of model to override in preset hyper-parameter dictionary that is queried based on the topology_id. Can contain a subset of all hyperparameters. """ - # Get cell type version instance based on topology ID, species and organ. - self.celltypes_version = celltype_versions.SPECIES_DICT[species.lower()][organ.lower()] + # Get cell type version instance based on topology ID, organism and organ. + self.celltypes_version = celltype_versions.ORGANISM_DICT[organism.lower()][organ.lower()] self.celltypes_version.set_version(version=topology_container.topology_id) unkown_already_included = np.any([x.lower() == "unknown" for x in self.celltypes_version.ids]) @@ -95,8 +95,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - CellTypeMlp.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, out_dim=self.celltypes_version.ntypes if unkown_already_included else self.celltypes_version.ntypes + 1, **hyperpar @@ -107,7 +106,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/embedding/ae.py b/sfaira/models/embedding/ae.py index 27b4d069c..58fa5dc47 100644 --- a/sfaira/models/embedding/ae.py +++ b/sfaira/models/embedding/ae.py @@ -58,8 +58,8 @@ def __init__( self.layer_list.append(tf.keras.layers.Dropout(hid_drop, name='enc_%s_drop' % i)) def call(self, x, **kwargs): - for l in self.layer_list: - x = l(x) + for layer in self.layer_list: + x = layer(x) return x @@ -105,8 +105,8 @@ def __init__( self.layer_list.append(tf.keras.layers.Dropout(hid_drop, name='dec_%s_drop' % i)) def call(self, x, **kwargs): - for l in self.layer_list: - x = l(x) + for layer in self.layer_list: + x = layer(x) return x @@ -214,8 +214,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - ModelAe.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, **hyperpar ) @@ -225,7 +224,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/embedding/linear.py b/sfaira/models/embedding/linear.py index 93fd33ee6..72ac6f8e2 100644 --- a/sfaira/models/embedding/linear.py +++ b/sfaira/models/embedding/linear.py @@ -109,8 +109,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - ModelLinear.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, **hyperpar ) @@ -120,7 +119,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/embedding/output_layers.py b/sfaira/models/embedding/output_layers.py index d4192c7c6..65ac4a56a 100644 --- a/sfaira/models/embedding/output_layers.py +++ b/sfaira/models/embedding/output_layers.py @@ -3,6 +3,7 @@ class NegBinOutput(tf.keras.layers.Layer): """Negative binomial output layer""" + def __init__( self, original_dim=None, @@ -203,4 +204,4 @@ def call(self, inputs, **kwargs): invlinker_mean = mean_clip + sf invlinker_var = tf.exp(var_clip) - return [invlinker_mean, invlinker_var] \ No newline at end of file + return [invlinker_mean, invlinker_var] diff --git a/sfaira/models/embedding/vae.py b/sfaira/models/embedding/vae.py index b24b0d75e..f122d670b 100644 --- a/sfaira/models/embedding/vae.py +++ b/sfaira/models/embedding/vae.py @@ -161,7 +161,6 @@ def __init__( else: raise ValueError("len(latent_dim)=%i should be uneven to provide a defined bottleneck" % len(latent_dim)) - inputs_encoder = tf.keras.Input(shape=(in_dim,), name='counts') inputs_sf = tf.keras.Input(shape=(1,), name='size_factors') inputs_encoder_pp = PreprocInput()(inputs_encoder) @@ -237,8 +236,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - ModelVae.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, **hyperpar ) @@ -248,7 +246,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/embedding/vaeiaf.py b/sfaira/models/embedding/vaeiaf.py index 3cc518fdc..80436e1a3 100644 --- a/sfaira/models/embedding/vaeiaf.py +++ b/sfaira/models/embedding/vaeiaf.py @@ -96,17 +96,17 @@ def call(self, inputs, **kwargs): class IAF(tf.keras.layers.Layer): def __init__( - self, - bottleneck: int, - n_iaf: int, - l1_coef: float, - l2_coef: float, - masking_dim=320, - n_made=2, - activation="relu", - name='iaf', - **kwargs - ): + self, + bottleneck: int, + n_iaf: int, + l1_coef: float, + l2_coef: float, + masking_dim=320, + n_made=2, + activation="relu", + name='iaf', + **kwargs + ): """ Transforms latent space with simple distribution to one with a more flexible one. @@ -241,7 +241,6 @@ def __init__( else: raise ValueError("len(latent_dim)=%i should be uneven to provide a defined bottleneck" % len(latent_dim)) - inputs_encoder = tf.keras.Input(shape=(in_dim,), name='counts') inputs_sf = tf.keras.Input(shape=(1,), name='size_factors') inputs_encoder_pp = PreprocInput()(inputs_encoder) @@ -255,7 +254,7 @@ def __init__( kernel_initializer=init ) iaf = IAF( - bottleneck=latent_dim[n_layers_enc-1], + bottleneck=latent_dim[n_layers_enc - 1], n_iaf=n_iaf, l1_coef=l1_coef, l2_coef=l2_coef @@ -284,8 +283,8 @@ def __init__( z, s_t_sigmas = iaf([z, h]) z_t_square_mc += tf.square(z) z_t_mean += z - z_t_square_mc = z_t_square_mc/mc_samples - z_t_mean = z_t_mean/mc_samples + z_t_square_mc = z_t_square_mc / mc_samples + z_t_mean = z_t_mean / mc_samples cum_s_t_log_var = 0 for s_t_sigma in s_t_sigmas: @@ -355,8 +354,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - ModelVaeIAF.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, **hyperpar ) @@ -366,7 +364,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/embedding/vaevamp.py b/sfaira/models/embedding/vaevamp.py index 55ce3bd47..db9d75c81 100644 --- a/sfaira/models/embedding/vaevamp.py +++ b/sfaira/models/embedding/vaevamp.py @@ -298,8 +298,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - ModelVaeVamp.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, **hyperpar ) @@ -310,7 +309,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/made.py b/sfaira/models/made.py index fb08e2b67..eec724905 100644 --- a/sfaira/models/made.py +++ b/sfaira/models/made.py @@ -59,6 +59,7 @@ def __init__(self, units, out_units, self.kernel_constraint = constraints.get(kernel_constraint) self.bias_constraint = constraints.get(bias_constraint) self.batchnorm = batchnorm + def dropout_wrapper(self, inputs, training): if 0. < self.rate < 1.: def dropped_inputs(): diff --git a/sfaira/train/external.py b/sfaira/train/external.py deleted file mode 100644 index 158904c07..000000000 --- a/sfaira/train/external.py +++ /dev/null @@ -1,5 +0,0 @@ -from sfaira.versions.celltype_versions import SPECIES_DICT -from sfaira.data import DatasetGroupBase, DatasetSuperGroup -from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.interface.model_zoo import ModelZoo, ModelZooEmbedding, ModelZooCelltype -from sfaira.data import mouse, human diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index 86119aa17..74cdc6cb9 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -6,10 +6,10 @@ import warnings from typing import Union, List import os -from .train_model import TargetZoos -from .external import SPECIES_DICT -from .external import EstimatorKerasEmbedding +from sfaira.train.train_model import TargetZoos +from sfaira.versions.celltype_versions import ORGANISM_DICT +from sfaira.estimators import EstimatorKerasEmbedding def _tp(yhat, ytrue): @@ -666,15 +666,15 @@ def create_summary_tab(self): metrics = list(self.evals.values())[0]['val'].keys() self.summary_tab = pandas.DataFrame(dict( list({ - "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], - "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], - "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], + "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], + "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], + "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], - "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], - "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], - "cv": [id_i.split("_")[-1] if self.cv else "cv0" for id_i in self.run_ids], - "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], - "organ": [id_i.split("_")[2] for id_i in self.run_ids], + "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], + "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], + "cv": [id_i.split("_")[-1] if self.cv else "cv0" for id_i in self.run_ids], + "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], + "organ": [id_i.split("_")[2] for id_i in self.run_ids], "model_type": [ "linear" if (id_i.split("_")[3] == "mlp" and id_i.split("_")[5].split(".")[1] == "0") else id_i.split("_")[3] @@ -682,10 +682,10 @@ def create_summary_tab(self): ], "model_gs_id": ["_".join(id_i.split("_")[:(self.model_id_len + 6)]) for id_i in self.run_ids], "run": self.run_ids - }.items()) + - list(dict([("train_" + m, [self.evals[x]["train"][m] for x in self.run_ids]) for m in metrics]).items()) + - list(dict([("test_" + m, [self.evals[x]["test"][m] for x in self.run_ids]) for m in metrics]).items()) + - list(dict([("val_" + m, [self.evals[x]["val"][m] for x in self.run_ids]) for m in metrics]).items()) + + }.items()) + # noqa: W504 + list(dict([("train_" + m, [self.evals[x]["train"][m] for x in self.run_ids]) for m in metrics]).items()) + # noqa: W504 + list(dict([("test_" + m, [self.evals[x]["test"][m] for x in self.run_ids]) for m in metrics]).items()) + # noqa: W504 + list(dict([("val_" + m, [self.evals[x]["val"][m] for x in self.run_ids]) for m in metrics]).items()) + # noqa: W504 list(dict([("all_" + m, [self.evals[x]["all"][m] for x in self.run_ids]) for m in metrics]).items()) )) if self.summary_tab.shape[0] == 0: @@ -808,7 +808,7 @@ def plot_best( fig, axs = plt.subplots(1, 1, figsize=(height_fig, width_fig)) with sns.axes_style("dark"): axs = sns.heatmap( - sns_data_heatmap, #mask=mask, + sns_data_heatmap, # mask=mask, annot=True, fmt=".2f", ax=axs, vmin=0, vmax=1, xticklabels=True, yticklabels=True, @@ -835,7 +835,7 @@ def plot_best_classwise_heatmap( Plot evaluation metric heatmap for specified organ by cell classes and model types. :param organ: Organ to plot in heatmap. - :param organism: Species that the gridsearch was run on + :param organism: Organism that the gridsearch was run on :param datapath: Path to the local sfaira data repository :param celltype_version: Version in sfaira celltype universe :param partition_select: Based on which partition to select the best model @@ -883,11 +883,11 @@ def plot_best_classwise_heatmap( elif organism == "mouse": dataset = tz.data_mouse[organ] else: - raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'human')")) - dataset.load_all() + raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) + dataset.load() cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() - celltype_versions = SPECIES_DICT.copy() + celltype_versions = ORGANISM_DICT.copy() celltype_versions[organism][organ].set_version(celltype_version) leafnodes = celltype_versions[organism][organ].ids ontology = celltype_versions[organism][organ].ontology[celltype_version]["names"] @@ -900,7 +900,7 @@ def plot_best_classwise_heatmap( for leaf in ontology[k]: if leaf not in cell_counts.keys(): cell_counts[leaf] = 0 - cell_counts[leaf] += 1/len(ontology[k]) + cell_counts[leaf] += 1 / len(ontology[k]) del cell_counts[k] # Compute class-wise metrics @@ -998,7 +998,7 @@ def plot_best_classwise_scatter( Plot evaluation metric scatterplot for specified organ by cell classes and model types. :param organ: Organ to plot in heatmap. - :param organism: Species that the gridsearch was run on + :param organism: Organism that the gridsearch was run on :param datapath: Path to the local sfaira data repository :param celltype_version: Version in sfaira celltype universe :param partition_select: Based on which partition to select the best model @@ -1048,11 +1048,11 @@ def plot_best_classwise_scatter( elif organism == "mouse": dataset = tz.data_mouse[organ] else: - raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'human')")) - dataset.load_all() + raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) + dataset.load() cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() - celltype_versions = SPECIES_DICT.copy() + celltype_versions = ORGANISM_DICT.copy() celltype_versions[organism][organ].set_version(celltype_version) leafnodes = celltype_versions[organism][organ].ids ontology = celltype_versions[organism][organ].ontology[celltype_version]["names"] @@ -1065,7 +1065,7 @@ def plot_best_classwise_scatter( for leaf in ontology[k]: if leaf not in cell_counts.keys(): cell_counts[leaf] = 0 - cell_counts[leaf] += 1/len(ontology[k]) + cell_counts[leaf] += 1 / len(ontology[k]) del cell_counts[k] # Compute class-wise metrics @@ -1175,23 +1175,31 @@ def create_summary_tab(self): metrics = list(self.evals.values())[0]['val'].keys() self.summary_tab = pandas.DataFrame(dict( list({ - "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], - "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], - "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], - "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], - "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], - "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], - "cv": [id_i.split("_")[-1] if self.cv else "1" for id_i in self.run_ids], - "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], - "organ": [id_i.split("_")[2] for id_i in self.run_ids], - "model_type": [id_i.split("_")[3] for id_i in self.run_ids], + "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], + "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], + "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], + "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], + "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], + "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], + "cv": [id_i.split("_")[-1] if self.cv else "1" for id_i in self.run_ids], + "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], + "organ": [id_i.split("_")[2] for id_i in self.run_ids], + "model_type": [id_i.split("_")[3] for id_i in self.run_ids], "model_gs_id": ["_".join(id_i.split("_")[:(self.model_id_len + 6)]) for id_i in self.run_ids], "run": self.run_ids, - }.items()) + - list(dict([("train_" + m, [self.evals[x]["train"][m] if m in self.evals[x]["train"].keys() else self.evals[x]["train"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models - list(dict([("test_" + m, [self.evals[x]["test"][m] if m in self.evals[x]["test"].keys() else self.evals[x]["test"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models - list(dict([("val_" + m, [self.evals[x]["val"][m] if m in self.evals[x]["val"].keys() else self.evals[x]["val"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models - list(dict([("all_" + m, [self.evals[x]["all"][m] if m in self.evals[x]["all"].keys() else self.evals[x]["all"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + }.items()) + + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("train_" + m, [self.evals[x]["train"][m] if m in self.evals[x]["train"].keys() + else self.evals[x]["train"]['neg_ll_' + m] for x in self.run_ids]) for m in metrics]).items()) + + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("test_" + m, [self.evals[x]["test"][m] if m in self.evals[x]["test"].keys() + else self.evals[x]["test"]['neg_ll_' + m] for x in self.run_ids]) for m in metrics]).items()) + + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("val_" + m, [self.evals[x]["val"][m] if m in self.evals[x]["val"].keys() + else self.evals[x]["val"]['neg_ll_' + m] for x in self.run_ids]) for m in metrics]).items()) + + list(dict([("all_" + m, [self.evals[x]["all"][m] if m in self.evals[x]["all"].keys() + else self.evals[x]["all"]['neg_ll_' + m] for x in self.run_ids]) + for m in metrics]).items()) # TODO: Hacky solution to make sure metrics are called the same in VAE and other models )) # TODO: Hacky solution to make sure metrics are called the same in VAE and other models @@ -1292,7 +1300,7 @@ def plot_best( np.logical_and( sns_tab["model_type"].values == m, sns_tab["organ"].values == o - ), f"{partition_show}_{metric_show}" + ), f"{partition_show}_{metric_show}" ] if data_temp.shape[0] > 0: if self.cv: @@ -1319,7 +1327,7 @@ def plot_best( fig, axs = plt.subplots(1, 1, figsize=(height_fig, width_fig)) with sns.axes_style("dark"): axs = sns.heatmap( - sns_data_heatmap, #mask=mask, + sns_data_heatmap, # mask=mask, annot=True, fmt=".2f", ax=axs, xticklabels=True, yticklabels=True, @@ -1381,18 +1389,17 @@ def get_gradients_by_celltype( elif organism == "mouse": dataset = tz.data_mouse[organ] else: - raise (ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'human')")) - dataset.load_all(annotated_only=True) + raise (ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) + dataset.load(annotated_only=True) print('Compute gradients (2/3): load embedding') # load embedding adata = dataset.adata - topology = model_id embedding = EstimatorKerasEmbedding( data=adata, model_dir="", model_id="", - species=organism, + organism=organism, organ=organ, model_type=model_type, model_topology=model_id.split('_')[5] @@ -1471,10 +1478,10 @@ def plot_gradient_distr( if normalize: avg_grads[modelt] = np.abs(avg_grads[modelt]) avg_grads[modelt] = (avg_grads[modelt] - np.min(avg_grads[modelt], axis=1, keepdims=True)) / \ - np.maximum( - np.max(avg_grads[modelt], axis=1, keepdims=True) - np.min(avg_grads[modelt], - axis=1, - keepdims=True), 1e-8) + np.maximum( + np.max(avg_grads[modelt], axis=1, keepdims=True) - np.min(avg_grads[modelt], + axis=1, + keepdims=True), 1e-8) fig, axs = plt.subplots(1, 1, figsize=(width_fig, height_fig)) @@ -1613,10 +1620,10 @@ def plot_npc( """ import matplotlib.pyplot as plt if self.summary_tab is None: - self.create_summary_tab() + self.create_summary_tab() models = np.unique(self.summary_tab["model_type"]).tolist() self.summary_tab["topology"] = [x.split("_")[5] for x in self.summary_tab["model_gs_id"].values] - + with plt.style.context("seaborn-whitegrid"): plt.figure(figsize=(12, 6)) for model in models: @@ -1635,7 +1642,7 @@ def plot_npc( eig_sum = sum(eig_vals) var_exp = [(i / eig_sum) for i in sorted(eig_vals, reverse=True)] cum_var_exp = np.cumsum([0] + var_exp) - plt.step(range(0, eig_vals.shape[0]+1), cum_var_exp, where="post", linewidth=3, + plt.step(range(0, eig_vals.shape[0] + 1), cum_var_exp, where="post", linewidth=3, label="%s cumulative explained variance (95%%: %s / 99%%: %s)" % (model, np.sum(cum_var_exp < .95), np.sum(cum_var_exp < .99))) plt.yticks([0.0, .25, .50, .75, .95, .99]) plt.ylabel("Explained variance ratio", fontsize=16) @@ -1645,8 +1652,8 @@ def plot_npc( plt.show() def plot_active_latent_units( - self, - organ, + self, + organ, topology_version, cvs=None ): @@ -1664,7 +1671,7 @@ def active_latent_units_mask(z): min_var_x = 0.01 active_units_mask = var_x > min_var_x return active_units_mask - + import matplotlib.pyplot as plt if self.summary_tab is None: self.create_summary_tab() @@ -1676,11 +1683,11 @@ def active_latent_units_mask(z): plt.axhline(np.log(0.01), color="k", linestyle='dashed', linewidth=2, label="active unit threshold") for i, model in enumerate(models): model_id, embedding, covar = self.best_model_embedding( - subset={"model_type": model, "organ": organ, "topology": topology_version}, - partition="val", - metric="loss", - cvs=cvs, - ) + subset={"model_type": model, "organ": organ, "topology": topology_version}, + partition="val", + metric="loss", + cvs=cvs, + ) if len(embedding[0].shape) == 3: z = embedding[0][0] # in case of three-dimensional VAE embedding (z, z_mean, z_var), use z else: @@ -1690,7 +1697,7 @@ def active_latent_units_mask(z): log_var = np.log(var) active_units = np.log(var[active_latent_units_mask(z)]) - plt.plot(range(1,log_var.shape[0]+1), log_var, color=colors[i], alpha=1.0, linewidth=3, + plt.plot(range(1, log_var.shape[0] + 1), log_var, color=colors[i], alpha=1.0, linewidth=3, label="%s active units: %i" % (model, len(active_units))) # to plot vertical lines log_var_cut = var.copy() @@ -1698,13 +1705,13 @@ def active_latent_units_mask(z): log_var_cut = np.log(log_var_cut) num_active = np.argmax(log_var_cut) if num_active > 0: - plt.vlines(num_active, ymin = -.15, ymax = 0.15, color=colors[i], linestyle='solid', linewidth=3) + plt.vlines(num_active, ymin=-.15, ymax=0.15, color=colors[i], linestyle='solid', linewidth=3) if model == "vaevamp": - z1, z2 = np.split(np.log(np.diagonal(np.cov(z.T))),2) - plt.plot(range(1, int(latent_dim/2)+1), np.sort(z2)[::-1], color=colors[i], alpha=1.0, - label=r"%s $z_2$ active units: %i" % (model, len(z2[z2>np.log(0.01)])), linestyle='dashed', + z1, z2 = np.split(np.log(np.diagonal(np.cov(z.T))), 2) + plt.plot(range(1, int(latent_dim / 2) + 1), np.sort(z2)[::-1], color=colors[i], alpha=1.0, + label=r"%s $z_2$ active units: %i" % (model, len(z2[z2 > np.log(0.01)])), linestyle='dashed', linewidth=3) - plt.plot(range(1, int(latent_dim/2)+1), np.sort(z1)[::-1], color=colors[i], alpha=1.0, + plt.plot(range(1, int(latent_dim / 2) + 1), np.sort(z1)[::-1], color=colors[i], alpha=1.0, label=r"%s $z_1$ active units: %i" % (model, len(z1[z1 > np.log(0.01)])), linestyle='dotted', linewidth=3) plt.xlabel(r'Latent unit $i$', fontsize=16) diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index c6125d400..45abfb1bb 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -5,103 +5,103 @@ import pickle from typing import Union -from .external import DatasetGroupBase, DatasetSuperGroup -from .external import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from .external import ModelZoo, ModelZooEmbedding, ModelZooCelltype -from .external import mouse, human -from .external import SPECIES_DICT +from sfaira.data import DatasetGroup, DatasetSuperGroup +from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding +from sfaira.interface import ModelZoo, ModelZooEmbedding, ModelZooCelltype +from sfaira.versions.celltype_versions import ORGANISM_DICT class TargetZoos: """ - Class that provides access to all available dataset groups in sfaira. + Class that provides access to all available dataset human in sfaira. Parameters ---------- path : str - The name of the animal + Path to the files for this dataset on disk meta_path : str - The sound the animal makes + Path to the meta files for this dataset on disk """ - def __init__(self, path: Union[str, None], meta_path: Union[str, None] = None): + def __init__(self, path: Union[str, None], meta_path: Union[str, None] = None, cache_path: Union[str, None] = None): if path is not None: + from sfaira.data.dataloaders.anatomical_groups import mouse, human self.data_mouse = { - "bladder": mouse.DatasetGroupBladder(path=path, meta_path=meta_path), - "brain": mouse.DatasetGroupBrain(path=path, meta_path=meta_path), - "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=meta_path), - "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=meta_path), - "heart": mouse.DatasetGroupHeart(path=path, meta_path=meta_path), - "kidney": mouse.DatasetGroupKidney(path=path, meta_path=meta_path), - "colon": mouse.DatasetGroupColon(path=path, meta_path=meta_path), - "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=meta_path), - "liver": mouse.DatasetGroupLiver(path=path, meta_path=meta_path), - "lung": mouse.DatasetGroupLung(path=path, meta_path=meta_path), - "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=meta_path), - "bone": mouse.DatasetGroupBone(path=path, meta_path=meta_path), - "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=meta_path), - "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=meta_path), - "blood": mouse.DatasetGroupBlood(path=path, meta_path=meta_path), - "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=meta_path), - "prostate": mouse.DatasetGroupProstate(path=path, meta_path=meta_path), - "rib": mouse.DatasetGroupRib(path=path, meta_path=meta_path), - "skin": mouse.DatasetGroupSkin(path=path, meta_path=meta_path), - "ileum": mouse.DatasetGroupIleum(path=path, meta_path=meta_path), - "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=meta_path), - "stomach": mouse.DatasetGroupStomach(path=path, meta_path=meta_path), - "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=meta_path), - "thymus": mouse.DatasetGroupThymus(path=path, meta_path=meta_path), - "tongue": mouse.DatasetGroupTongue(path=path, meta_path=meta_path), - "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=meta_path), - "uterus": mouse.DatasetGroupUterus(path=path) + "bladder": mouse.DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path), + "brain": mouse.DatasetGroupBrain(path=path, meta_path=meta_path, cache_path=cache_path), + "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=meta_path, cache_path=cache_path), + "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path), + "heart": mouse.DatasetGroupHeart(path=path, meta_path=meta_path, cache_path=cache_path), + "kidney": mouse.DatasetGroupKidney(path=path, meta_path=meta_path, cache_path=cache_path), + "colon": mouse.DatasetGroupColon(path=path, meta_path=meta_path, cache_path=cache_path), + "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=meta_path, cache_path=cache_path), + "liver": mouse.DatasetGroupLiver(path=path, meta_path=meta_path, cache_path=cache_path), + "lung": mouse.DatasetGroupLung(path=path, meta_path=meta_path, cache_path=cache_path), + "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=meta_path, cache_path=cache_path), + "bone": mouse.DatasetGroupBone(path=path, meta_path=meta_path, cache_path=cache_path), + "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=meta_path, cache_path=cache_path), + "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=meta_path, cache_path=cache_path), + "blood": mouse.DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path), + "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=meta_path, cache_path=cache_path), + "prostate": mouse.DatasetGroupProstate(path=path, meta_path=meta_path, cache_path=cache_path), + "rib": mouse.DatasetGroupRib(path=path, meta_path=meta_path, cache_path=cache_path), + "skin": mouse.DatasetGroupSkin(path=path, meta_path=meta_path, cache_path=cache_path), + "ileum": mouse.DatasetGroupIleum(path=path, meta_path=meta_path, cache_path=cache_path), + "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=meta_path, cache_path=cache_path), + "stomach": mouse.DatasetGroupStomach(path=path, meta_path=meta_path, cache_path=cache_path), + "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=meta_path, cache_path=cache_path), + "thymus": mouse.DatasetGroupThymus(path=path, meta_path=meta_path, cache_path=cache_path), + "tongue": mouse.DatasetGroupTongue(path=path, meta_path=meta_path, cache_path=cache_path), + "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=meta_path, cache_path=cache_path), + "uterus": mouse.DatasetGroupUterus(path=path, cache_path=cache_path), } self.data_human = { - 'adipose': human.DatasetGroupAdipose(path=path, meta_path=meta_path), - 'adrenalgland': human.DatasetGroupAdrenalgland(path=path, meta_path=meta_path), - 'mixed': human.DatasetGroupMixed(path=path, meta_path=meta_path), - 'artery': human.DatasetGroupArtery(path=path, meta_path=meta_path), - 'bladder': human.DatasetGroupBladder(path=path, meta_path=meta_path), - 'blood': human.DatasetGroupBlood(path=path, meta_path=meta_path), - 'bone': human.DatasetGroupBone(path=path, meta_path=meta_path), - 'brain': human.DatasetGroupBrain(path=path, meta_path=meta_path), - 'calvaria': human.DatasetGroupCalvaria(path=path, meta_path=meta_path), - 'cervix': human.DatasetGroupCervix(path=path, meta_path=meta_path), - 'chorionicvillus': human.DatasetGroupChorionicvillus(path=path, meta_path=meta_path), - 'colon': human.DatasetGroupColon(path=path, meta_path=meta_path), - 'duodenum': human.DatasetGroupDuodenum(path=path, meta_path=meta_path), - 'epityphlon': human.DatasetGroupEpityphlon(path=path, meta_path=meta_path), - 'esophagus': human.DatasetGroupEsophagus(path=path, meta_path=meta_path), - 'eye': human.DatasetGroupEye(path=path, meta_path=meta_path), - 'fallopiantube': human.DatasetGroupFallopiantube(path=path, meta_path=meta_path), - 'femalegonad': human.DatasetGroupFemalegonad(path=path, meta_path=meta_path), - 'gallbladder': human.DatasetGroupGallbladder(path=path, meta_path=meta_path), - 'heart': human.DatasetGroupHeart(path=path, meta_path=meta_path), - 'hesc': human.DatasetGroupHesc(path=path, meta_path=meta_path), - 'ileum': human.DatasetGroupIleum(path=path, meta_path=meta_path), - 'jejunum': human.DatasetGroupJejunum(path=path, meta_path=meta_path), - 'kidney': human.DatasetGroupKidney(path=path, meta_path=meta_path), - 'liver': human.DatasetGroupLiver(path=path, meta_path=meta_path), - 'lung': human.DatasetGroupLung(path=path, meta_path=meta_path), - 'malegonad': human.DatasetGroupMalegonad(path=path, meta_path=meta_path), - 'muscle': human.DatasetGroupMuscle(path=path, meta_path=meta_path), - 'omentum': human.DatasetGroupOmentum(path=path, meta_path=meta_path), - 'pancreas': human.DatasetGroupPancreas(path=path, meta_path=meta_path), - 'placenta': human.DatasetGroupPlacenta(path=path, meta_path=meta_path), - 'pleura': human.DatasetGroupPleura(path=path, meta_path=meta_path), - 'prostate': human.DatasetGroupProstate(path=path, meta_path=meta_path), - 'rectum': human.DatasetGroupRectum(path=path, meta_path=meta_path), - 'rib': human.DatasetGroupRib(path=path, meta_path=meta_path), - 'skin': human.DatasetGroupSkin(path=path, meta_path=meta_path), - 'spinalcord': human.DatasetGroupSpinalcord(path=path, meta_path=meta_path), - 'spleen': human.DatasetGroupSpleen(path=path, meta_path=meta_path), - 'stomach': human.DatasetGroupStomach(path=path, meta_path=meta_path), - 'thymus': human.DatasetGroupThymus(path=path, meta_path=meta_path), - 'thyroid': human.DatasetGroupThyroid(path=path, meta_path=meta_path), - 'trachea': human.DatasetGroupTrachea(path=path, meta_path=meta_path), - 'ureter': human.DatasetGroupUreter(path=path, meta_path=meta_path), - 'uterus': human.DatasetGroupUterus(path=path, meta_path=meta_path), + 'adipose': human.DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path), + 'adrenalgland': human.DatasetGroupAdrenalgland(path=path, meta_path=meta_path, cache_path=cache_path), + 'mixed': human.DatasetGroupMixed(path=path, meta_path=meta_path, cache_path=cache_path), + 'artery': human.DatasetGroupArtery(path=path, meta_path=meta_path, cache_path=cache_path), + 'bladder': human.DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path), + 'blood': human.DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path), + 'bone': human.DatasetGroupBone(path=path, meta_path=meta_path, cache_path=cache_path), + 'brain': human.DatasetGroupBrain(path=path, meta_path=meta_path, cache_path=cache_path), + 'calvaria': human.DatasetGroupCalvaria(path=path, meta_path=meta_path, cache_path=cache_path), + 'cervix': human.DatasetGroupCervix(path=path, meta_path=meta_path, cache_path=cache_path), + 'chorionicvillus': human.DatasetGroupChorionicvillus(path=path, meta_path=meta_path, cache_path=cache_path), + 'colon': human.DatasetGroupColon(path=path, meta_path=meta_path, cache_path=cache_path), + 'duodenum': human.DatasetGroupDuodenum(path=path, meta_path=meta_path, cache_path=cache_path), + 'epityphlon': human.DatasetGroupEpityphlon(path=path, meta_path=meta_path, cache_path=cache_path), + 'esophagus': human.DatasetGroupEsophagus(path=path, meta_path=meta_path, cache_path=cache_path), + 'eye': human.DatasetGroupEye(path=path, meta_path=meta_path, cache_path=cache_path), + 'fallopiantube': human.DatasetGroupFallopiantube(path=path, meta_path=meta_path, cache_path=cache_path), + 'femalegonad': human.DatasetGroupFemalegonad(path=path, meta_path=meta_path, cache_path=cache_path), + 'gallbladder': human.DatasetGroupGallbladder(path=path, meta_path=meta_path, cache_path=cache_path), + 'heart': human.DatasetGroupHeart(path=path, meta_path=meta_path, cache_path=cache_path), + 'hesc': human.DatasetGroupHesc(path=path, meta_path=meta_path, cache_path=cache_path), + 'ileum': human.DatasetGroupIleum(path=path, meta_path=meta_path, cache_path=cache_path), + 'jejunum': human.DatasetGroupJejunum(path=path, meta_path=meta_path, cache_path=cache_path), + 'kidney': human.DatasetGroupKidney(path=path, meta_path=meta_path, cache_path=cache_path), + 'liver': human.DatasetGroupLiver(path=path, meta_path=meta_path, cache_path=cache_path), + 'lung': human.DatasetGroupLung(path=path, meta_path=meta_path, cache_path=cache_path), + 'malegonad': human.DatasetGroupMalegonad(path=path, meta_path=meta_path, cache_path=cache_path), + 'muscle': human.DatasetGroupMuscle(path=path, meta_path=meta_path, cache_path=cache_path), + 'omentum': human.DatasetGroupOmentum(path=path, meta_path=meta_path, cache_path=cache_path), + 'pancreas': human.DatasetGroupPancreas(path=path, meta_path=meta_path, cache_path=cache_path), + 'placenta': human.DatasetGroupPlacenta(path=path, meta_path=meta_path, cache_path=cache_path), + 'pleura': human.DatasetGroupPleura(path=path, meta_path=meta_path, cache_path=cache_path), + 'prostate': human.DatasetGroupProstate(path=path, meta_path=meta_path, cache_path=cache_path), + 'rectum': human.DatasetGroupRectum(path=path, meta_path=meta_path, cache_path=cache_path), + 'rib': human.DatasetGroupRib(path=path, meta_path=meta_path, cache_path=cache_path), + 'skin': human.DatasetGroupSkin(path=path, meta_path=meta_path, cache_path=cache_path), + 'spinalcord': human.DatasetGroupSpinalcord(path=path, meta_path=meta_path, cache_path=cache_path), + 'spleen': human.DatasetGroupSpleen(path=path, meta_path=meta_path, cache_path=cache_path), + 'stomach': human.DatasetGroupStomach(path=path, meta_path=meta_path, cache_path=cache_path), + 'thymus': human.DatasetGroupThymus(path=path, meta_path=meta_path, cache_path=cache_path), + 'thyroid': human.DatasetGroupThyroid(path=path, meta_path=meta_path, cache_path=cache_path), + 'trachea': human.DatasetGroupTrachea(path=path, meta_path=meta_path, cache_path=cache_path), + 'ureter': human.DatasetGroupUreter(path=path, meta_path=meta_path, cache_path=cache_path), + 'uterus': human.DatasetGroupUterus(path=path, meta_path=meta_path, cache_path=cache_path), } - + else: self.data_human = None self.data_mouse = None @@ -116,8 +116,8 @@ def write_celltypes_tocsv_human(self, fn: str): ds = self.data_human[x] self._write_celltypes_tocsv(fn, x, ds) - def _write_celltypes_tocsv(self, fn: str, x: str, ds: DatasetGroupBase): - ds.load_all(annotated_only=True, remove_gene_version=False, match_to_reference=None) + def _write_celltypes_tocsv(self, fn: str, x: str, ds: DatasetGroup): + ds.load(annotated_only=True, remove_gene_version=False, match_to_reference=None) if len(ds.adata_ls) > 0: obs = ds.obs_concat(keys=["cell_ontology_class", "cell_ontology_id"]) obs.index = range(0, obs.shape[0]) @@ -159,7 +159,7 @@ class TrainModel(TargetZoos): estimator: Union[None, EstimatorKeras] zoo: Union[None, ModelZoo] model_dir: str - data: Union[DatasetGroupBase, DatasetSuperGroup, anndata.AnnData, str, None] + data: Union[DatasetGroup, DatasetSuperGroup, anndata.AnnData, str, None] def __init__(self, data_path: str, meta_path: str): # Check if handling backed anndata or base path to directory of raw files: @@ -187,7 +187,7 @@ def adata(self): raise ValueError("self.data not set yet") elif isinstance(self.data, anndata.AnnData): return self.data - elif isinstance(self.data, DatasetGroupBase) or isinstance(self.data, DatasetSuperGroup): + elif isinstance(self.data, DatasetGroup) or isinstance(self.data, DatasetSuperGroup): return self.data.adata else: raise ValueError("self.data type not recognized: %s " % type(self.data)) @@ -200,7 +200,7 @@ def human_target(self, organ: str): def set_data( self, - data_group: Union[DatasetGroupBase, DatasetSuperGroup] + data_group: Union[DatasetGroup, DatasetSuperGroup] ): """ Set input data group. @@ -260,7 +260,7 @@ def init_estim( data=self.adata, model_dir=self.model_dir, model_id=self.zoo.model_id, - species=self.zoo.species, + organism=self.zoo.organism, organ=self.zoo.organ, model_type=self.zoo.model_type, model_topology=self.zoo.model_topology @@ -327,7 +327,7 @@ def init_estim( data=self.adata, model_dir=self.model_dir, model_id=self.zoo.model_id, - species=self.zoo.species, + organism=self.zoo.organism, organ=self.zoo.organ, model_type=self.zoo.model_type, model_topology=self.zoo.model_topology @@ -379,10 +379,10 @@ def _save_specific( cell_counts = self.data.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() cell_counts_leaf = cell_counts.copy() - celltype_versions = SPECIES_DICT.copy() - celltype_versions[self.zoo.species][self.zoo.organ].set_version(self.zoo.model_version.split(".")[0]) - leafnodes = celltype_versions[self.zoo.species][self.zoo.organ].ids - ontology = celltype_versions[self.zoo.species][self.zoo.organ].ontology[self.zoo.model_version.split(".")[0]]["names"] + celltype_versions = ORGANISM_DICT.copy() + celltype_versions[self.zoo.organism][self.zoo.organ].set_version(self.zoo.model_version.split(".")[0]) + leafnodes = celltype_versions[self.zoo.organism][self.zoo.organ].ids + ontology = celltype_versions[self.zoo.organism][self.zoo.organ].ontology[self.zoo.model_version.split(".")[0]]["names"] for k in cell_counts.keys(): if k not in leafnodes: if k not in ontology.keys(): @@ -390,7 +390,7 @@ def _save_specific( for leaf in ontology[k]: if leaf not in cell_counts_leaf.keys(): cell_counts_leaf[leaf] = 0 - cell_counts_leaf[leaf] += 1/len(ontology[k]) + cell_counts_leaf[leaf] += 1 / len(ontology[k]) del cell_counts_leaf[k] with open(fn + '_celltypes_valuecounts_wholedata.pickle', 'wb') as f: pickle.dump(obj=[cell_counts, cell_counts_leaf], file=f) diff --git a/sfaira/unit_tests/test_data_template.py b/sfaira/unit_tests/test_data_template.py new file mode 100644 index 000000000..5993e7e88 --- /dev/null +++ b/sfaira/unit_tests/test_data_template.py @@ -0,0 +1,49 @@ +import unittest + +from sfaira.data import DatasetGroupDirectoryOriented + + +class TestDatasetTemplate(unittest.TestCase): + dir_data: str = "./test_data" + dir_meta: str = "./test_data/meta" + + def test_load(self): + """ + Address ToDos before running test to customize to your data set. + :return: + """ + celltype_version = None + remove_gene_version = True + match_to_reference = None + # ToDo: add correct module here as "YOUR_STUDY": + from sfaira.data.dataloaders.loaders.YOUR_STUDY import FILE_PATH + ds = DatasetGroupDirectoryOriented( + file_base=FILE_PATH, + path=self.dir_data, + meta_path=self.dir_meta, + cache_path=self.dir_data + ) + # Test raw loading and caching: + ds.load( + celltype_version=celltype_version, + fn=None, + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=True, # tests raw loading + allow_caching=True # tests caching + ) + # Test loading from cache: + ds.load( + celltype_version=celltype_version, + fn=None, + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=False, + allow_caching=False + ) + # Test concatenation: + _ = ds.adata + + +if __name__ == '__main__': + unittest.main() diff --git a/sfaira/unit_tests/test_dataset.py b/sfaira/unit_tests/test_dataset.py index 9f745322e..763dc89f0 100644 --- a/sfaira/unit_tests/test_dataset.py +++ b/sfaira/unit_tests/test_dataset.py @@ -3,7 +3,8 @@ import scipy.sparse import unittest -from sfaira.data import mouse, DatasetSuperGroup +from sfaira.data import DatasetSuperGroup +from sfaira.data import DatasetSuperGroupSfaira class TestDatasetGroups(unittest.TestCase): @@ -11,11 +12,15 @@ class TestDatasetGroups(unittest.TestCase): dir_meta: str = "./test_data/meta" def test_load(self): - ds = mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) + ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) ds.load_all() def test_adata(self): - ds = mouse.DatasetGroupBladder(path=self.dir_data, meta_path=self.dir_meta) + ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["bladder"]) _ = ds.adata @@ -24,27 +29,24 @@ class TestDatasetSuperGroups(unittest.TestCase): dir_meta: str = "./test_data/meta" def test_load(self): - ds = DatasetSuperGroup( - dataset_groups=[ - mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) - ] - ) + ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) ds.load_all() def test_adata(self): - ds = DatasetSuperGroup( - dataset_groups=[ - mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) - ] - ) + ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) _ = ds.adata def test_load_backed_dense(self, genome="Mus_musculus_GRCm38_97"): - ds = DatasetSuperGroup( - dataset_groups=[ - mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) - ] - ) + ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) ds.load_all_tobacked( fn_backed=os.path.join(self.dir_data, 'test_backed_data.h5ad'), genome=genome, @@ -55,11 +57,10 @@ def test_load_backed_dense(self, genome="Mus_musculus_GRCm38_97"): assert isinstance(ds.adata.X[:], np.ndarray), "%s" % type(ds.adata.X) def test_load_backed_sparse(self, genome="Mus_musculus_GRCm38_97"): - ds = DatasetSuperGroup( - dataset_groups=[ - mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) - ] - ) + ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) ds.load_all_tobacked( fn_backed=os.path.join(self.dir_data, 'test_backed_data.h5ad'), genome=genome, diff --git a/sfaira/unit_tests/test_estimator.py b/sfaira/unit_tests/test_estimator.py index bb9c0a15e..ddb711dad 100644 --- a/sfaira/unit_tests/test_estimator.py +++ b/sfaira/unit_tests/test_estimator.py @@ -21,8 +21,8 @@ class _TestEstimator: """ Contains functions _test* to test individual functions and attributes of estimator class. - - TODO for everybody working on this, add one _test* function in here and add it into + + TODO for everybody working on this, add one _test* function in here and add it into basic_estimator_test(). See _test_call() for an example. """ @@ -73,7 +73,7 @@ class TestEstimatorKerasEmbedding(unittest.TestCase, _TestEstimator): def set_topology(self, model_type): self.topology_container = Topologies( - species="mouse", + organism="mouse", model_class="embedding", model_type=model_type, topology_id="0.1" @@ -84,7 +84,7 @@ def init_estimator(self): data=self.data, model_dir=None, model_id=None, - species="mouse", + organism="mouse", organ="lung", model_type=self.topology_container.model_type, model_topology=self.topology_container.topology_id @@ -134,7 +134,7 @@ class TestEstimatorKerasCelltype(unittest.TestCase, _TestEstimator): def set_topology(self, model_type): self.topology_container = Topologies( - species="mouse", + organism="mouse", model_class="celltype", model_type=model_type, topology_id="0.0.1" @@ -145,7 +145,7 @@ def init_estimator(self): data=self.data, model_dir=None, model_id=None, - species="mouse", + organism="mouse", organ="lung", model_type=self.topology_container.model_type, model_topology=self.topology_container.topology_id diff --git a/sfaira/unit_tests/test_userinterface.py b/sfaira/unit_tests/test_userinterface.py index f7b8dbc90..aa99a8ee7 100644 --- a/sfaira/unit_tests/test_userinterface.py +++ b/sfaira/unit_tests/test_userinterface.py @@ -12,8 +12,8 @@ class TestUi(unittest.TestCase): """ Contains functions _test* to test individual functions and attributes of the user interface class. - - TODO for everybody working on this, add one _test* function in here and add it into + + TODO for everybody working on this, add one _test* function in here and add it into basic_estimator_test(). See _test_call() for an example. """ diff --git a/sfaira/unit_tests/test_zoo.py b/sfaira/unit_tests/test_zoo.py index 2fa0b0763..f1f7db52c 100644 --- a/sfaira/unit_tests/test_zoo.py +++ b/sfaira/unit_tests/test_zoo.py @@ -14,8 +14,8 @@ class _TestZoo: """ Contains functions _test* to test individual functions and attributes of estimator class. - - TODO for everybody working on this, add one _test* function in here and add it into + + TODO for everybody working on this, add one _test* function in here and add it into basic_estimator_test(). See _test_kipoi_call() for an example. """ @@ -56,7 +56,7 @@ def _test_basic(self, id: str): np.random.seed(1) self.simulate() self.init_zoo() - #self._test_kipoi_call() + # self._test_kipoi_call() self.zoo_manual.set_model_id(id) diff --git a/sfaira/versions/__init__.py b/sfaira/versions/__init__.py index e69de29bb..7840c39b1 100644 --- a/sfaira/versions/__init__.py +++ b/sfaira/versions/__init__.py @@ -0,0 +1,3 @@ +from . import celltype_versions +from . import genome_versions +from . import topology_versions diff --git a/sfaira/versions/celltype_versions/__init__.py b/sfaira/versions/celltype_versions/__init__.py index 3e4990909..68f91677f 100644 --- a/sfaira/versions/celltype_versions/__init__.py +++ b/sfaira/versions/celltype_versions/__init__.py @@ -8,31 +8,31 @@ # Load versions from extension if available: try: - from sfaira_extension.versions.celltype_versions import SPECIES_DICT as SPECIES_DICT_EXTENSION + from sfaira_extension.versions.celltype_versions import ORGANISM_DICT as ORGANISM_DICT_EXTENSION for organ in mouse.keys(): - if organ in SPECIES_DICT_EXTENSION["mouse"].keys(): - for v in SPECIES_DICT_EXTENSION["mouse"][organ].versions: + if organ in ORGANISM_DICT_EXTENSION["mouse"].keys(): + for v in ORGANISM_DICT_EXTENSION["mouse"][organ].versions: if v in mouse[organ].celltype_universe.keys(): raise ValueError(f'Celltype version {v} already defined for mouse organ {organ} in base sfaira. ' f'Please define a new version in sfaira_extension.') else: - mouse[organ].celltype_universe[v] = SPECIES_DICT_EXTENSION["mouse"][organ].celltype_universe[v] - mouse[organ].ontology[v] = SPECIES_DICT_EXTENSION["mouse"][organ].ontology[v] + mouse[organ].celltype_universe[v] = ORGANISM_DICT_EXTENSION["mouse"][organ].celltype_universe[v] + mouse[organ].ontology[v] = ORGANISM_DICT_EXTENSION["mouse"][organ].ontology[v] for organ in human.keys(): - if organ in SPECIES_DICT_EXTENSION["human"].keys(): - for v in SPECIES_DICT_EXTENSION["human"][organ].versions: + if organ in ORGANISM_DICT_EXTENSION["human"].keys(): + for v in ORGANISM_DICT_EXTENSION["human"][organ].versions: if v in human[organ].celltype_universe.keys(): - raise ValueError(f'Celltype version {v} already defined for human organ {organ} in base sfaira. ' + raise ValueError(f'Celltype version {v} already defined for loaders organ {organ} in base sfaira. ' f'Please define a new version in sfaira_extension.') else: - human[organ].celltype_universe[v] = SPECIES_DICT_EXTENSION["human"][organ].celltype_universe[v] - human[organ].ontology[v] = SPECIES_DICT_EXTENSION["human"][organ].ontology[v] + human[organ].celltype_universe[v] = ORGANISM_DICT_EXTENSION["human"][organ].celltype_universe[v] + human[organ].ontology[v] = ORGANISM_DICT_EXTENSION["human"][organ].ontology[v] except ImportError: pass -SPECIES_DICT = { +ORGANISM_DICT = { "mouse": mouse, "human": human -} \ No newline at end of file +} diff --git a/sfaira/versions/celltype_versions/base.py b/sfaira/versions/celltype_versions/base.py index 3beeef80c..40b6c4c66 100644 --- a/sfaira/versions/celltype_versions/base.py +++ b/sfaira/versions/celltype_versions/base.py @@ -27,7 +27,7 @@ def __init__(self, **kwargs): ) # Check that ontology terms are unique also between ontologies if np.sum([len(x) for x in self.ontology.values()]) != \ - len(np.unique(np.array([list(x) for x in self.ontology.values()]))): + len(np.unique(np.array([list(x) for x in self.ontology.values()]))): raise ValueError( "duplicated ontology terms found between ontologies in %s" % type(self) @@ -66,11 +66,10 @@ def set_version( else: raise ValueError("version supplied should be either in format `a.b.c` or `a`") - @property def ids(self): """ - List of all human understandable cell type names of this instance. + List of all loaders understandable cell type names of this instance. :return: """ diff --git a/sfaira/versions/celltype_versions/human/brain.py b/sfaira/versions/celltype_versions/human/brain.py index 4e5bc6144..0bea539e2 100644 --- a/sfaira/versions/celltype_versions/human/brain.py +++ b/sfaira/versions/celltype_versions/human/brain.py @@ -44,7 +44,9 @@ ONTOLOGIES_HUMAN_BRAIN_V0 = { "names": { 'Astrocyte': ['Astrocytes 1', 'Astrocytes 2'], - 'Fetal Neuron': ['Glutamatergic neurons from the PFC 1', 'Glutamatergic neurons from the PFC 2', 'Granule neurons from the hip dentate gyrus region', 'GABAergic interneurons 1', 'GABAergic interneurons 2', 'Pyramidal neurons from the hip CA region 1', 'Pyramidal neurons from the hip CA region 2'] + 'Fetal Neuron': ['Glutamatergic neurons from the PFC 1', 'Glutamatergic neurons from the PFC 2', + 'Granule neurons from the hip dentate gyrus region', 'GABAergic interneurons 1', + 'GABAergic interneurons 2', 'Pyramidal neurons from the hip CA region 1', 'Pyramidal neurons from the hip CA region 2'] }, "ontology_ids": {}, } diff --git a/sfaira/versions/celltype_versions/human/eye.py b/sfaira/versions/celltype_versions/human/eye.py index 26a41e393..66afcbdbd 100644 --- a/sfaira/versions/celltype_versions/human/eye.py +++ b/sfaira/versions/celltype_versions/human/eye.py @@ -50,7 +50,7 @@ ONTOLOGIES_HUMAN_EYE_V0 = { "names": { 'BPs': ['Retinal bipolar neuron type A', 'Retinal bipolar neuron type B', 'Retinal bipolar neuron type C', 'Retinal bipolar neuron type D'], - 'Rods': ['Retinal rod cell type A', 'Retinal rod cell type B', 'Retinal rod cell type C',] + 'Rods': ['Retinal rod cell type A', 'Retinal rod cell type B', 'Retinal rod cell type C', ] }, "ontology_ids": {}, } @@ -65,4 +65,4 @@ def __init__(self, **kwargs): self.ontology = { "0": ONTOLOGIES_HUMAN_EYE_V0 } - super(CelltypeVersionsHumanEye, self).__init__(**kwargs) \ No newline at end of file + super(CelltypeVersionsHumanEye, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/kidney.py b/sfaira/versions/celltype_versions/human/kidney.py index f05b0c3fd..c1ba3b0ad 100644 --- a/sfaira/versions/celltype_versions/human/kidney.py +++ b/sfaira/versions/celltype_versions/human/kidney.py @@ -111,7 +111,7 @@ ONTOLOGIES_HUMAN_KIDNEY_V0 = { "names": { 'Type A intercalated cell': ['Collecting Duct - Intercalated Cells Type A (cortex)', - 'Collecting Duct - Intercalated Cells Type A (medulla)'], + 'Collecting Duct - Intercalated Cells Type A (medulla)'], 'Principal cell': ['Collecting Duct - PCs - Stressed Dissoc Subset', 'Collecting Duct - Principal Cells (cortex)', 'Collecting Duct - Principal Cells (medulla)'], @@ -121,9 +121,13 @@ 'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)', 'Proximal Tubule Epithelial Cells - Stress/Inflam'], 'Dendritic cell': ['MNP-c/dendritic cell', 'Plasmacytoid dendritic cell'], - 'Endothelial cell': ['Endothelial Cells (unassigned)', 'Endothelial Cells - AEA & DVR', 'Endothelial Cells - AVR', 'Endothelial Cells - glomerular capillaries', 'Peritubular capillary endothelium 1', 'Peritubular capillary endothelium 2'], - 'Epithelial cell': ['Pelvic epithelium', 'Pelvic epithelium - distal UB', 'Proximal Tubule Epithelial Cells (S1)', 'Proximal Tubule Epithelial Cells (S2)', 'Proximal Tubule Epithelial Cells (S3)', 'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)', 'Proximal Tubule Epithelial Cells - Stress/Inflam'], - 'Intercalated cell': ['Collecting Duct - Intercalated Cells Type A (cortex)', 'Collecting Duct - Intercalated Cells Type A (medulla)', 'Collecting Duct - Intercalated Cells Type B', 'Indistinct intercalated cell'], + 'Endothelial cell': ['Endothelial Cells (unassigned)', 'Endothelial Cells - AEA & DVR', 'Endothelial Cells - AVR', + 'Endothelial Cells - glomerular capillaries', 'Peritubular capillary endothelium 1', 'Peritubular capillary endothelium 2'], + 'Epithelial cell': ['Pelvic epithelium', 'Pelvic epithelium - distal UB', 'Proximal Tubule Epithelial Cells (S1)', + 'Proximal Tubule Epithelial Cells (S2)', 'Proximal Tubule Epithelial Cells (S3)', + 'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)', 'Proximal Tubule Epithelial Cells - Stress/Inflam'], + 'Intercalated cell': ['Collecting Duct - Intercalated Cells Type A (cortex)', 'Collecting Duct - Intercalated Cells Type A (medulla)', + 'Collecting Duct - Intercalated Cells Type B', 'Indistinct intercalated cell'], 'T cell': ['CD4 T cell', 'CD8 T cell'], 'Ureteric bud cell': ['CNT/PC - proximal UB', 'Proximal UB', 'Pelvic epithelium - distal UB'] }, diff --git a/sfaira/versions/celltype_versions/human/liver.py b/sfaira/versions/celltype_versions/human/liver.py index 6120a740f..d294a31cd 100644 --- a/sfaira/versions/celltype_versions/human/liver.py +++ b/sfaira/versions/celltype_versions/human/liver.py @@ -72,8 +72,8 @@ "names": { 'Erythroid cells': ['Early Erythroid', 'Mid Erythroid', 'Late Erythroid'], 'Endothelial cell': ['Liver sinusoidal endothelial cells', 'Macrovascular endothelial cells', 'Other endothelial cells'], - 'Hepatocyte': ['Hepatocyte 1','Hepatocyte 2','Hepatocyte 3','Hepatocyte 4','Hepatocyte 5','Hepatocyte 6'], - 'Hepatocytes': ['Hepatocyte 1','Hepatocyte 2','Hepatocyte 3','Hepatocyte 4','Hepatocyte 5','Hepatocyte 6'], + 'Hepatocyte': ['Hepatocyte 1', 'Hepatocyte 2', 'Hepatocyte 3', 'Hepatocyte 4', 'Hepatocyte 5', 'Hepatocyte 6'], + 'Hepatocytes': ['Hepatocyte 1', 'Hepatocyte 2', 'Hepatocyte 3', 'Hepatocyte 4', 'Hepatocyte 5', 'Hepatocyte 6'], 'Endothelia': ['Liver sinusoidal endothelial cells', 'Macrovascular endothelial cells', 'Other endothelial cells'], 'Bcells': ['pro B cell', 'Pre pro B cell', 'Mature B cells', 'pre B cell', 'Plasma B cell'], 'Tcells': ['Gamma delta T cells 2', 'Gamma delta T cells 1', 'Alpha beta T cells'], diff --git a/sfaira/versions/celltype_versions/human/mixed.py b/sfaira/versions/celltype_versions/human/mixed.py index bd2d91e3a..02922bae0 100644 --- a/sfaira/versions/celltype_versions/human/mixed.py +++ b/sfaira/versions/celltype_versions/human/mixed.py @@ -29,6 +29,7 @@ "ontology_ids": {}, } + class CelltypeVersionsHumanMixed(CelltypeVersionsBase): def __init__(self, **kwargs): diff --git a/sfaira/versions/celltype_versions/human/rectum.py b/sfaira/versions/celltype_versions/human/rectum.py index 6741afe2f..80d30a0a8 100644 --- a/sfaira/versions/celltype_versions/human/rectum.py +++ b/sfaira/versions/celltype_versions/human/rectum.py @@ -38,4 +38,4 @@ def __init__(self, **kwargs): self.ontology = { "0": ONTOLOGIES_HUMAN_RECTUM_V0 } - super(CelltypeVersionsHumanRectum, self).__init__(**kwargs) \ No newline at end of file + super(CelltypeVersionsHumanRectum, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/skin.py b/sfaira/versions/celltype_versions/human/skin.py index 6e391ec50..160003138 100644 --- a/sfaira/versions/celltype_versions/human/skin.py +++ b/sfaira/versions/celltype_versions/human/skin.py @@ -41,7 +41,8 @@ ] ONTOLOGIES_HUMAN_SKIN_V0 = { "names": { - 'immune': ['B cell', 'T cell', 'Dendritic cell', 'Erythroid cell', 'Erythroid progenitor cell (RP high)', 'Macrophage', 'Mast cell', 'Monocyte', 'Neutrophil', 'Neutrophil (RPS high)', 'Proliferating T cell'], + 'immune': ['B cell', 'T cell', 'Dendritic cell', 'Erythroid cell', 'Erythroid progenitor cell (RP high)', 'Macrophage', + 'Mast cell', 'Monocyte', 'Neutrophil', 'Neutrophil (RPS high)', 'Proliferating T cell'], 'Basal cell': ['Basal cell 1', 'Basal cell 2'] }, "ontology_ids": {}, @@ -57,4 +58,4 @@ def __init__(self, **kwargs): self.ontology = { "0": ONTOLOGIES_HUMAN_SKIN_V0 } - super(CelltypeVersionsHumanSkin, self).__init__(**kwargs) \ No newline at end of file + super(CelltypeVersionsHumanSkin, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/spleen.py b/sfaira/versions/celltype_versions/human/spleen.py index 2c1d0bfa3..27cca5500 100644 --- a/sfaira/versions/celltype_versions/human/spleen.py +++ b/sfaira/versions/celltype_versions/human/spleen.py @@ -66,4 +66,4 @@ def __init__(self, **kwargs): self.ontology = { "0": ONTOLOGIES_HUMAN_SPLEEN_V0 } - super(CelltypeVersionsHumanSpleen, self).__init__(**kwargs) \ No newline at end of file + super(CelltypeVersionsHumanSpleen, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/adipose.py b/sfaira/versions/celltype_versions/mouse/adipose.py index 5b390523d..1b82c908c 100644 --- a/sfaira/versions/celltype_versions/mouse/adipose.py +++ b/sfaira/versions/celltype_versions/mouse/adipose.py @@ -17,7 +17,7 @@ ONTOLOGIES_MOUSE_ADIPOSE_V0 = { "names": { "lymphocyte": [ - "B cell", "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", + "B cell", "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", "myeloid cell", "NK cell" ], "T cell": ["CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell"] diff --git a/sfaira/versions/celltype_versions/mouse/bladder.py b/sfaira/versions/celltype_versions/mouse/bladder.py index 3c2215ec7..995dc6e3d 100644 --- a/sfaira/versions/celltype_versions/mouse/bladder.py +++ b/sfaira/versions/celltype_versions/mouse/bladder.py @@ -18,7 +18,7 @@ ONTOLOGIES_MOUSE_BLADDER_V0 = { "names": { "bladder cell": ["basal epithelial cell", "epithelial cell", "mesenchymal stromal cell", "smooth muscle cell", - "stromal cell", "umbrella cell"], + "stromal cell", "umbrella cell"], "leukocyte": ["dendritic cell", "macrophage", "NK cell"] }, "ontology_ids": {}, diff --git a/sfaira/versions/celltype_versions/mouse/kidney.py b/sfaira/versions/celltype_versions/mouse/kidney.py index 5866bb19f..9fe0c66c2 100644 --- a/sfaira/versions/celltype_versions/mouse/kidney.py +++ b/sfaira/versions/celltype_versions/mouse/kidney.py @@ -47,7 +47,7 @@ ], "lymphocyte": ["B cell", "dendritic cell", "macrophage", "NK cell", "T cell"], "leukocyte": [ - "B cell", "dendritic cell", "macrophage", "neutrophil progenitor", + "B cell", "dendritic cell", "macrophage", "neutrophil progenitor", "NK cell", "plasma cell", "T cell" ], }, diff --git a/sfaira/versions/celltype_versions/mouse/pancreas.py b/sfaira/versions/celltype_versions/mouse/pancreas.py index a89f3ce78..b367cfacd 100644 --- a/sfaira/versions/celltype_versions/mouse/pancreas.py +++ b/sfaira/versions/celltype_versions/mouse/pancreas.py @@ -31,7 +31,7 @@ "macrophage", "t cell" ], - "endocrine cell": [ + "endocrine cell": [ "pancreatic A cell", "pancreatic D cell", "pancreatic PP cell" diff --git a/sfaira/versions/celltype_versions/mouse/spleen.py b/sfaira/versions/celltype_versions/mouse/spleen.py index f5204f45a..67a2e21df 100644 --- a/sfaira/versions/celltype_versions/mouse/spleen.py +++ b/sfaira/versions/celltype_versions/mouse/spleen.py @@ -23,7 +23,7 @@ ONTOLOGIES_MOUSE_SPLEEN_V0 = { "names": { "T cell": [ - "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", + "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", "immature NKT cell", "mature NK T cell" ] }, diff --git a/sfaira/versions/genome_versions/class_interface.py b/sfaira/versions/genome_versions/class_interface.py index 9b28a5994..ffc44de77 100644 --- a/sfaira/versions/genome_versions/class_interface.py +++ b/sfaira/versions/genome_versions/class_interface.py @@ -5,15 +5,15 @@ class SuperGenomeContainer: _cache_tab: pandas.DataFrame genome: str - species: str + organism: str def __init__( self, - species: str, + organism: str, genome: str ): - self.species = species - if self.species == "human": + self.organism = organism + if self.organism == "human": try: from sfaira_extension.versions.genome_versions.human import GenomeContainer if genome not in GenomeContainer.available_genomes: @@ -24,7 +24,7 @@ def __init__( from .human import GenomeContainer if genome not in GenomeContainer.available_genomes: raise ValueError(f"Genome {genome} not recognised.") - elif self.species == "mouse": + elif self.organism == "mouse": try: from sfaira_extension.versions.genome_versions.mouse import GenomeContainer if genome not in GenomeContainer.available_genomes: @@ -36,7 +36,7 @@ def __init__( if genome not in GenomeContainer.available_genomes: raise ValueError(f"Genome {genome} not recognised.") else: - raise ValueError(f"Species {species} not recognised.") + raise ValueError(f"Organism {organism} not recognised.") self.gc = GenomeContainer() self.set_genome(genome=genome) diff --git a/sfaira/versions/genome_versions/human/genome_sizes.py b/sfaira/versions/genome_versions/human/genome_sizes.py index db7d7fda7..5d898aeaa 100644 --- a/sfaira/versions/genome_versions/human/genome_sizes.py +++ b/sfaira/versions/genome_versions/human/genome_sizes.py @@ -1,3 +1,3 @@ GENOME_SIZE_DICT = { "Homo_sapiens_GRCh38_97": (19986, ) -} \ No newline at end of file +} diff --git a/sfaira/versions/genome_versions/mouse/genome_container.py b/sfaira/versions/genome_versions/mouse/genome_container.py index ce9d047ab..13c341234 100644 --- a/sfaira/versions/genome_versions/mouse/genome_container.py +++ b/sfaira/versions/genome_versions/mouse/genome_container.py @@ -16,4 +16,4 @@ def __init__(self): } def read_local_csv(self, genome): - return pandas.read_csv(os.path.join(str(os.path.dirname(__file__)), self.genomes[genome])) \ No newline at end of file + return pandas.read_csv(os.path.join(str(os.path.dirname(__file__)), self.genomes[genome])) diff --git a/sfaira/versions/genome_versions/mouse/genome_sizes.py b/sfaira/versions/genome_versions/mouse/genome_sizes.py index 8d1c9c3e7..63cf95ff0 100644 --- a/sfaira/versions/genome_versions/mouse/genome_sizes.py +++ b/sfaira/versions/genome_versions/mouse/genome_sizes.py @@ -1,3 +1,3 @@ GENOME_SIZE_DICT = { "Mus_musculus_GRCm38_97": (21900, ) -} \ No newline at end of file +} diff --git a/sfaira/versions/topology_versions/class_interface.py b/sfaira/versions/topology_versions/class_interface.py index e27b80847..0aa186c53 100644 --- a/sfaira/versions/topology_versions/class_interface.py +++ b/sfaira/versions/topology_versions/class_interface.py @@ -8,7 +8,7 @@ class Topologies: def __init__( self, - species: str, + organism: str, model_class: str, model_type: str, topology_id: str @@ -43,27 +43,27 @@ def __init__( } } } - self.species = species + self.organism = organism self.model_class = model_class self.model_type = model_type self.topology_id = topology_id - assert species in list(self.topologies.keys()), \ - "species %s not found in %s" % \ - (species, list(self.topologies.keys())) - assert model_class in list(self.topologies[species].keys()), \ + assert organism in list(self.topologies.keys()), \ + "organism %s not found in %s" % \ + (organism, list(self.topologies.keys())) + assert model_class in list(self.topologies[organism].keys()), \ "model_class %s not found in %s" % \ - (model_type, list(self.topologies[species].keys())) - assert model_type in list(self.topologies[species][model_class].keys()), \ + (model_type, list(self.topologies[organism].keys())) + assert model_type in list(self.topologies[organism][model_class].keys()), \ "model_type %s not found in %s" % \ - (model_type, list(self.topologies[species][model_class].keys())) - assert topology_id in list(self.topologies[species][model_class][model_type].keys()), \ + (model_type, list(self.topologies[organism][model_class].keys())) + assert topology_id in list(self.topologies[organism][model_class][model_type].keys()), \ "topology_id %s not found in %s" % \ - (topology_id, list(self.topologies[species][model_class][model_type].keys())) - self.genome_container = SuperGenomeContainer(species=species, genome=self.topology["genome"]) + (topology_id, list(self.topologies[organism][model_class][model_type].keys())) + self.genome_container = SuperGenomeContainer(organism=organism, genome=self.topology["genome"]) @property def topology(self): - return self.topologies[self.species][self.model_class][self.model_type][self.topology_id] + return self.topologies[self.organism][self.model_class][self.model_type][self.topology_id] @property def ngenes(self): diff --git a/sfaira/versions/topology_versions/external.py b/sfaira/versions/topology_versions/external.py index 93bcbab8e..86fafa27f 100644 --- a/sfaira/versions/topology_versions/external.py +++ b/sfaira/versions/topology_versions/external.py @@ -1 +1 @@ -from sfaira.versions.genome_versions import SuperGenomeContainer \ No newline at end of file +from sfaira.versions.genome_versions import SuperGenomeContainer # noqa: W292 diff --git a/sfaira/versions/topology_versions/human/embedding/ae.py b/sfaira/versions/topology_versions/human/embedding/ae.py index 225100769..fd449a8c1 100644 --- a/sfaira/versions/topology_versions/human/embedding/ae.py +++ b/sfaira/versions/topology_versions/human/embedding/ae.py @@ -2,60 +2,60 @@ "0.1": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 64, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 64, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (256, 128, 64, 128, 256), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (256, 128, 64, 128, 256), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 256, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 256, 128, 256, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.4": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 64, 128, 256, 512), - "l2_coef": 0., - "l1_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_const_disp" + "latent_dim": (512, 256, 128, 64, 128, 256, 512), + "l2_coef": 0., + "l1_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/human/embedding/linear.py b/sfaira/versions/topology_versions/human/embedding/linear.py index 80f9edeca..ef1bc2c53 100644 --- a/sfaira/versions/topology_versions/human/embedding/linear.py +++ b/sfaira/versions/topology_versions/human/embedding/linear.py @@ -2,33 +2,33 @@ "0.1": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": 64, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": False, - "output_layer": "nb_shared_disp" + "latent_dim": 64, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": False, + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": False, - "output_layer": "nb_shared_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": False, + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": False, - "output_layer": "nb_const_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": False, + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/human/embedding/nmf.py b/sfaira/versions/topology_versions/human/embedding/nmf.py index d006be9cb..7ab548d78 100644 --- a/sfaira/versions/topology_versions/human/embedding/nmf.py +++ b/sfaira/versions/topology_versions/human/embedding/nmf.py @@ -2,33 +2,33 @@ "0.1": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": 64, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": True, - "output_layer": "nb_shared_disp" + "latent_dim": 64, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": True, + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": True, - "output_layer": "nb_shared_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": True, + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": True, - "output_layer": "nb_const_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": True, + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/human/embedding/vae.py b/sfaira/versions/topology_versions/human/embedding/vae.py index 535a907c8..8ba9d4199 100644 --- a/sfaira/versions/topology_versions/human/embedding/vae.py +++ b/sfaira/versions/topology_versions/human/embedding/vae.py @@ -2,56 +2,56 @@ "0.1": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 64, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 64, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (256, 128, 64, 128, 256), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (256, 128, 64, 128, 256), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 256, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 256, 128, 256, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.4": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 64, 128, 256, 512), - "l2_coef": 0., - "l1_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_const_disp" + "latent_dim": (512, 256, 128, 64, 128, 256, 512), + "l2_coef": 0., + "l1_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/human/embedding/vaeiaf.py b/sfaira/versions/topology_versions/human/embedding/vaeiaf.py index 0602ac457..5ad4cf9ea 100644 --- a/sfaira/versions/topology_versions/human/embedding/vaeiaf.py +++ b/sfaira/versions/topology_versions/human/embedding/vaeiaf.py @@ -2,29 +2,29 @@ "0.1": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (256, 128, 64, 128, 256), - "n_iaf": 2, - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "tanh", - "init": "glorot_uniform", - "output_layer": "nb_shared_disp" + "latent_dim": (256, 128, 64, 128, 256), + "n_iaf": 2, + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "tanh", + "init": "glorot_uniform", + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 256, 512), - "n_iaf": 2, - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "tanh", - "init": "glorot_uniform", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 256, 128, 256, 512), + "n_iaf": 2, + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "tanh", + "init": "glorot_uniform", + "output_layer": "nb_shared_disp" } } } diff --git a/sfaira/versions/topology_versions/mouse/embedding/ae.py b/sfaira/versions/topology_versions/mouse/embedding/ae.py index 4c628642a..12b092138 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/ae.py +++ b/sfaira/versions/topology_versions/mouse/embedding/ae.py @@ -2,60 +2,60 @@ "0.1": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 64, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 64, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (256, 128, 64, 128, 256), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (256, 128, 64, 128, 256), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 256, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 256, 128, 256, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.4": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 64, 128, 256, 512), - "l2_coef": 0., - "l1_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_const_disp" + "latent_dim": (512, 256, 128, 64, 128, 256, 512), + "l2_coef": 0., + "l1_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/mouse/embedding/linear.py b/sfaira/versions/topology_versions/mouse/embedding/linear.py index cd07f0366..f073b42a2 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/linear.py +++ b/sfaira/versions/topology_versions/mouse/embedding/linear.py @@ -2,33 +2,33 @@ "0.1": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": 64, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": False, - "output_layer": "nb_shared_disp" + "latent_dim": 64, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": False, + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": False, - "output_layer": "nb_shared_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": False, + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": False, - "output_layer": "nb_const_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": False, + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/mouse/embedding/nmf.py b/sfaira/versions/topology_versions/mouse/embedding/nmf.py index 65b2b44a3..9283ae40f 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/nmf.py +++ b/sfaira/versions/topology_versions/mouse/embedding/nmf.py @@ -2,33 +2,33 @@ "0.1": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": 64, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": True, - "output_layer": "nb_shared_disp" + "latent_dim": 64, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": True, + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": True, - "output_layer": "nb_shared_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": True, + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": True, - "output_layer": "nb_const_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": True, + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/mouse/embedding/vae.py b/sfaira/versions/topology_versions/mouse/embedding/vae.py index 49b45b01f..aaeab8e76 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vae.py +++ b/sfaira/versions/topology_versions/mouse/embedding/vae.py @@ -2,56 +2,56 @@ "0.1": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 64, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 64, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (256, 128, 64, 128, 256), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (256, 128, 64, 128, 256), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 256, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 256, 128, 256, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.4": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 64, 128, 256, 512), - "l2_coef": 0., - "l1_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_const_disp" + "latent_dim": (512, 256, 128, 64, 128, 256, 512), + "l2_coef": 0., + "l1_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py b/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py index d6dd458b2..28989d580 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py +++ b/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py @@ -2,29 +2,29 @@ "0.1": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (256, 128, 64, 128, 256), - "n_iaf": 2, - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "tanh", - "init": "glorot_uniform", - "output_layer": "nb_shared_disp" + "latent_dim": (256, 128, 64, 128, 256), + "n_iaf": 2, + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "tanh", + "init": "glorot_uniform", + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 256, 512), - "n_iaf": 2, - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "tanh", - "init": "glorot_uniform", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 256, 128, 256, 512), + "n_iaf": 2, + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "tanh", + "init": "glorot_uniform", + "output_layer": "nb_shared_disp" } } } @@ -37,4 +37,4 @@ **VAEIAF_TOPOLOGIES_EXTENSION } except ImportError: - pass \ No newline at end of file + pass diff --git a/versioneer.py b/versioneer.py index 8c2ece54e..d52552de5 100644 --- a/versioneer.py +++ b/versioneer.py @@ -561,15 +561,15 @@ def git_get_keywords(versionfile_abs): f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) + mo = re.search(r'=\s*"(.*)"', line) # noqa: W605 if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) + mo = re.search(r'=\s*"(.*)"', line) # noqa: W605 if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) + mo = re.search(r'=\s*"(.*)"', line) # noqa: W605 if mo: keywords["date"] = mo.group(1) f.close()