diff --git a/_viash.yaml b/_viash.yaml index 4c0572bb..1df7d655 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -1,9 +1,51 @@ name: task_perturbation_prediction -version: 1.1.0 - -# package metadata +label: Perturbation Prediction +summary: Predicting how small molecules change gene expression in different cell types. description: | - Predicting how small molecules change gene expression in different cell types. + Human biology can be complex, in part due to the function and interplay of the body's + approximately 37 trillion cells, which are organized into tissues, organs, and systems. + However, recent advances in single-cell technologies have provided unparalleled insight + into the function of cells and tissues at the level of DNA, RNA, and proteins. Yet + leveraging single-cell methods to develop medicines requires mapping causal links + between chemical perturbations and the downstream impact on cell state. These experiments + are costly and labor intensive, and not all cells and tissues are amenable to + high-throughput transcriptomic screening. If data science could help accurately predict + chemical perturbations in new cell types, it could accelerate and expand the development + of new medicines. + + Several methods have been developed for drug perturbation prediction, most of which are + variations on the autoencoder architecture (Dr.VAE, scGEN, and ChemCPA). However, these + methods lack proper benchmarking datasets with diverse cell types to determine how well + they generalize. The largest available training dataset is the NIH-funded Connectivity + Map (CMap), which comprises over 1.3M small molecule perturbation measurements. However, + the CMap includes observations of only 978 genes, less than 5% of all genes. Furthermore, + the CMap data is comprised almost entirely of measurements in cancer cell lines, which + may not accurately represent human biology. + + This task aims to predict how small molecules change gene expression in different cell + types. This task was a [Kaggle competition](https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/overview) + as part of the [NeurIPS 2023 competition track](https://neurips.cc/virtual/2023/competition/66586). + + The task is to predict the gene expression profile of a cell after a small molecule + perturbation. For this competition, we designed and generated a novel single-cell + perturbational dataset in human peripheral blood mononuclear cells (PBMCs). We + selected 144 compounds from the Library of Integrated Network-Based Cellular Signatures + (LINCS) Connectivity Map dataset ([PMID: 29195078](https://pubmed.ncbi.nlm.nih.gov/29195078/)) + and measured single-cell gene + expression profiles after 24 hours of treatment. The experiment was repeated in three + healthy human donors, and the compounds were selected based on diverse transcriptional + signatures observed in CD34+ hematopoietic stem cells (data not released). We performed + this experiment in human PBMCs because the cells are commercially available with + pre-obtained consent for public release and PBMCs are a primary, disease-relevant tissue + that contains multiple mature cell types (including T-cells, B-cells, myeloid cells, + and NK cells) with established markers for annotation of cell types. To supplement this + dataset, we also measured cells from each donor at baseline with joint scRNA and + single-cell chromatin accessibility measurements using the 10x Multiome assay. We hope + that the addition of rich multi-omic data for each donor and cell type at baseline will + help establish biological priors that explain the susceptibility of particular genes to + exhibit perturbation responses in difference biological contexts. + +version: dev license: MIT keywords: [single-cell, perturbation prediction, perturbation, openproblems, benchmark] links: @@ -11,6 +53,59 @@ links: repository: https://github.com/openproblems-bio/task_perturbation_prediction docker_registry: ghcr.io +authors: + - name: Artur Szałata + roles: [ author ] + info: + github: szalata + orcid: "000-0001-8413-234X" + - name: Robrecht Cannoodt + roles: [ author ] + info: + github: rcannood + orcid: "0000-0003-3641-729X" + - name: Daniel Burkhardt + roles: [ author ] + info: + github: dburkhardt + orcid: 0000-0001-7744-1363 + - name: Malte D. Luecken + roles: [ author ] + info: + github: LuckyMD + orcid: 0000-0001-7464-7921 + - name: Tin M. Tunjic + roles: [ contributor ] + info: + github: ttunja + orcid: 0000-0001-8842-6548 + - name: Mengbo Wang + roles: [ contributor ] + info: + github: wangmengbo + orcid: 0000-0002-0266-9993 + - name: Andrew Benz + roles: [ author ] + info: + github: andrew-benz + orcid: 0009-0002-8118-1861 + - name: Tianyu Liu + roles: [ contributor ] + info: + github: HelloWorldLTY + orcid: 0000-0002-9412-6573 + - name: Jalil Nourisa + roles: [ contributor ] + info: + github: janursa + orcid: 0000-0002-7539-4396 + - name: Rico Meinl + roles: [ contributor ] + info: + github: ricomnl + orcid: 0000-0003-4356-6058 + + # technical settings organization: openproblems-bio viash_version: 0.9.0 @@ -22,4 +117,4 @@ info: # set default labels config_mods: | - .runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" } \ No newline at end of file + .runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" } diff --git a/src/api/file_de_test.yaml b/src/api/file_de_test.yaml index d31e047f..33d2b615 100644 --- a/src/api/file_de_test.yaml +++ b/src/api/file_de_test.yaml @@ -1,10 +1,10 @@ type: file example: resources/datasets/neurips-2023-data/de_test.h5ad +label: DE test +summary: "Differential expression results for testing." info: - label: DE test - summary: "Differential expression results for testing." - file_type: h5ad - slots: + format: + type: h5ad obs: - name: cell_type type: string diff --git a/src/api/file_de_train.yaml b/src/api/file_de_train.yaml index a7ff239b..cb5636b6 100644 --- a/src/api/file_de_train.yaml +++ b/src/api/file_de_train.yaml @@ -1,10 +1,10 @@ type: file example: resources/datasets/neurips-2023-data/de_train.h5ad +label: DE train +summary: "Differential expression results for training." info: - label: DE train - summary: "Differential expression results for training." - file_type: h5ad - slots: + format: + type: h5ad obs: - name: cell_type type: string diff --git a/src/api/file_id_map.yaml b/src/api/file_id_map.yaml index a1551abe..63a8310b 100644 --- a/src/api/file_id_map.yaml +++ b/src/api/file_id_map.yaml @@ -1,19 +1,20 @@ type: file example: resources/datasets/neurips-2023-data/id_map.csv +label: ID Map +summary: "File indicates the order of de_test, the cell types and the small molecule names." info: - label: ID Map - summary: "File indicates the order of de_test, the cell types and the small molecule names." - file_type: csv - columns: - - name: id - type: integer - description: Index of the test observation - required: true - - name: cell_type - type: string - description: "Cell type name" - required: true - - name: sm_name - type: string - description: "Small molecule name" - required: true + format: + type: csv + columns: + - name: id + type: integer + description: Index of the test observation + required: true + - name: cell_type + type: string + description: "Cell type name" + required: true + - name: sm_name + type: string + description: "Small molecule name" + required: true diff --git a/src/api/file_model.yaml b/src/api/file_model.yaml index b7e9a988..d4c47277 100644 --- a/src/api/file_model.yaml +++ b/src/api/file_model.yaml @@ -1,6 +1,4 @@ type: file example: resources/datasets/neurips-2023-data/model/ -info: - label: Model - summary: "Optional model output. If no value is passed, the model will be removed at the end of the run." - file_type: directory \ No newline at end of file +label: Model +summary: "Optional model output. If no value is passed, the model will be removed at the end of the run." diff --git a/src/api/file_prediction.yaml b/src/api/file_prediction.yaml index 80b279b6..eda23ceb 100644 --- a/src/api/file_prediction.yaml +++ b/src/api/file_prediction.yaml @@ -1,10 +1,10 @@ type: file example: resources/datasets/neurips-2023-data/prediction.h5ad +label: Prediction +summary: "Differential Gene Expression prediction" info: - label: Prediction - summary: "Differential Gene Expression prediction" - file_type: h5ad - slots: + format: + type: h5ad layers: - name: prediction type: double diff --git a/src/api/file_sc_counts.yaml b/src/api/file_sc_counts.yaml index 493cc6a2..a760f008 100644 --- a/src/api/file_sc_counts.yaml +++ b/src/api/file_sc_counts.yaml @@ -1,10 +1,10 @@ type: file example: resources/neurips-2023-raw/sc_counts.h5ad +label: Single Cell Counts +summary: "Anndata with the counts of the whole dataset." info: - label: Single Cell Counts - summary: "Anndata with the counts of the whole dataset." - file_type: h5ad - slots: + format: + type: h5ad obs: - name: dose_uM description: "Dose in micromolar." diff --git a/src/api/file_score.yaml b/src/api/file_score.yaml index 3ec5c36a..258aa689 100644 --- a/src/api/file_score.yaml +++ b/src/api/file_score.yaml @@ -1,10 +1,10 @@ type: file example: resources/datasets/neurips-2023-data/score.h5ad +label: Score +summary: "File indicating the score of a metric." info: - label: Score - summary: "File indicating the score of a metric." - file_type: h5ad - slots: + format: + type: h5ad uns: - type: string name: dataset_id diff --git a/src/api/task_info.yaml b/src/api/task_info.yaml deleted file mode 100644 index c0dfb93f..00000000 --- a/src/api/task_info.yaml +++ /dev/null @@ -1,141 +0,0 @@ -name: perturbation_prediction -label: Perturbation Prediction -summary: Predicting how small molecules change gene expression in different cell types. -readme: | - ## Installation - - You need to have Docker, Java, and Viash installed. Follow - [these instructions](https://openproblems.bio/documentation/fundamentals/requirements) - to install the required dependencies. - - ## Add a method - - To add a method to the repository, follow the instructions in the `scripts/add_a_method.sh` script. - - ## Frequently used commands - - To get started, you can run the following commands: - - ```bash - git clone git@github.com:openproblems-bio/task_perturbation_prediction.git - - cd task_perturbation_prediction - - # download resources - scripts/download_resources.sh - ``` - - To run the benchmark, you first need to build the components. Afterwards, you can run the benchmark: - - ```bash - viash ns build --parallel --setup cachedbuild - - scripts/run_benchmark.sh - ``` - - After adding a component, it is recommended to run the tests to ensure that the component is working correctly: - - ```bash - viash ns test --parallel - ``` - - Optionally, you can provide the `--query` argument to test only a subset of components: - - ```bash - viash ns test --parallel --query "component_name" - ``` -motivation: | - Human biology can be complex, in part due to the function and interplay of the body's - approximately 37 trillion cells, which are organized into tissues, organs, and systems. - However, recent advances in single-cell technologies have provided unparalleled insight - into the function of cells and tissues at the level of DNA, RNA, and proteins. Yet - leveraging single-cell methods to develop medicines requires mapping causal links - between chemical perturbations and the downstream impact on cell state. These experiments - are costly and labor intensive, and not all cells and tissues are amenable to - high-throughput transcriptomic screening. If data science could help accurately predict - chemical perturbations in new cell types, it could accelerate and expand the development - of new medicines. - - Several methods have been developed for drug perturbation prediction, most of which are - variations on the autoencoder architecture (Dr.VAE, scGEN, and ChemCPA). However, these - methods lack proper benchmarking datasets with diverse cell types to determine how well - they generalize. The largest available training dataset is the NIH-funded Connectivity - Map (CMap), which comprises over 1.3M small molecule perturbation measurements. However, - the CMap includes observations of only 978 genes, less than 5% of all genes. Furthermore, - the CMap data is comprised almost entirely of measurements in cancer cell lines, which - may not accurately represent human biology. -description: | - This task aims to predict how small molecules change gene expression in different cell - types. This task was a [Kaggle competition](https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/overview) - as part of the [NeurIPS 2023 competition track](https://neurips.cc/virtual/2023/competition/66586). - - The task is to predict the gene expression profile of a cell after a small molecule - perturbation. For this competition, we designed and generated a novel single-cell - perturbational dataset in human peripheral blood mononuclear cells (PBMCs). We - selected 144 compounds from the Library of Integrated Network-Based Cellular Signatures - (LINCS) Connectivity Map dataset ([PMID: 29195078](https://pubmed.ncbi.nlm.nih.gov/29195078/)) - and measured single-cell gene - expression profiles after 24 hours of treatment. The experiment was repeated in three - healthy human donors, and the compounds were selected based on diverse transcriptional - signatures observed in CD34+ hematopoietic stem cells (data not released). We performed - this experiment in human PBMCs because the cells are commercially available with - pre-obtained consent for public release and PBMCs are a primary, disease-relevant tissue - that contains multiple mature cell types (including T-cells, B-cells, myeloid cells, - and NK cells) with established markers for annotation of cell types. To supplement this - dataset, we also measured cells from each donor at baseline with joint scRNA and - single-cell chromatin accessibility measurements using the 10x Multiome assay. We hope - that the addition of rich multi-omic data for each donor and cell type at baseline will - help establish biological priors that explain the susceptibility of particular genes to - exhibit perturbation responses in difference biological contexts. - -authors: - - name: Artur Szałata - roles: [ author ] - info: - github: szalata - orcid: "000-0001-8413-234X" - - name: Robrecht Cannoodt - roles: [ author ] - info: - github: rcannood - orcid: "0000-0003-3641-729X" - - name: Daniel Burkhardt - roles: [ author ] - info: - github: dburkhardt - orcid: 0000-0001-7744-1363 - - name: Malte D. Luecken - roles: [ author ] - info: - github: LuckyMD - orcid: 0000-0001-7464-7921 - - name: Tin M. Tunjic - roles: [ contributor ] - info: - github: ttunja - orcid: 0000-0001-8842-6548 - - name: Mengbo Wang - roles: [ contributor ] - info: - github: wangmengbo - orcid: 0000-0002-0266-9993 - - name: Andrew Benz - roles: [ author ] - info: - github: andrew-benz - orcid: 0009-0002-8118-1861 - - name: Tianyu Liu - roles: [ contributor ] - info: - github: HelloWorldLTY - orcid: 0000-0002-9412-6573 - - name: Jalil Nourisa - roles: [ contributor ] - info: - github: janursa - orcid: 0000-0002-7539-4396 - - name: Rico Meinl - roles: [ contributor ] - info: - github: ricomnl - orcid: 0000-0003-4356-6058 diff --git a/src/control_methods/ground_truth/config.vsh.yaml b/src/control_methods/ground_truth/config.vsh.yaml index 1d229eb7..de91a874 100644 --- a/src/control_methods/ground_truth/config.vsh.yaml +++ b/src/control_methods/ground_truth/config.vsh.yaml @@ -1,17 +1,17 @@ __merge__: ../../api/comp_control_method.yaml name: ground_truth +label: Ground truth +summary: "Returns the ground truth predictions." +description: | + The identity function that returns the ground-truth information as the output. info: - label: Ground truth - summary: "Returns the ground truth predictions." - description: | - The identity function that returns the ground-truth information as the output. preferred_normalization: counts resources: - type: r_script path: script.R engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/r:1.1.0 + image: openproblems/base_r:1.0.0 setup: - type: r cran: [ arrow, dplyr ] diff --git a/src/control_methods/mean_across_celltypes/config.vsh.yaml b/src/control_methods/mean_across_celltypes/config.vsh.yaml index 8b64b3f0..a8a8661a 100644 --- a/src/control_methods/mean_across_celltypes/config.vsh.yaml +++ b/src/control_methods/mean_across_celltypes/config.vsh.yaml @@ -1,17 +1,16 @@ __merge__: ../../api/comp_control_method.yaml name: mean_across_celltypes -info: - label: Mean per cell type and gene - summary: Baseline method that returns mean of cell type's outcomes - description: | - Baseline method that predicts for a cell type the mean of its outcomes of all compounds. +label: Mean per cell type and gene +summary: Baseline method that returns mean of cell type's outcomes +description: | + Baseline method that predicts for a cell type the mean of its outcomes of all compounds. resources: - type: python_script path: script.py - path: ../../utils/anndata_to_dataframe.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/python:1.1.0 + image: openproblems/base_python:1.0.0 setup: - type: python packages: [ fastparquet ] diff --git a/src/control_methods/mean_across_compounds/config.vsh.yaml b/src/control_methods/mean_across_compounds/config.vsh.yaml index c84a9a03..1f6a2481 100644 --- a/src/control_methods/mean_across_compounds/config.vsh.yaml +++ b/src/control_methods/mean_across_compounds/config.vsh.yaml @@ -1,17 +1,16 @@ __merge__: ../../api/comp_control_method.yaml name: mean_across_compounds -info: - label: Mean per compound and gene - summary: Baseline method that returns mean of compound's outcomes - description: | - Baseline method that predicts for a compound the mean of its outcomes of all samples. +label: Mean per compound and gene +summary: Baseline method that returns mean of compound's outcomes +description: | + Baseline method that predicts for a compound the mean of its outcomes of all samples. resources: - type: python_script path: script.py - path: ../../utils/anndata_to_dataframe.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/python:1.1.0 + image: openproblems/base_python:1.0.0 setup: - type: python packages: [ fastparquet ] diff --git a/src/control_methods/mean_outcome/config.vsh.yaml b/src/control_methods/mean_outcome/config.vsh.yaml index 13032c86..7d1ccc6d 100644 --- a/src/control_methods/mean_outcome/config.vsh.yaml +++ b/src/control_methods/mean_outcome/config.vsh.yaml @@ -1,17 +1,16 @@ __merge__: ../../api/comp_control_method.yaml name: mean_outcome -info: - label: Mean per gene - summary: Baseline method that returns mean of gene's outcomes - description: | - Baseline method that predicts for a gene the mean of its outcomes of all samples. +label: Mean per gene +summary: Baseline method that returns mean of gene's outcomes +description: | + Baseline method that predicts for a gene the mean of its outcomes of all samples. resources: - type: python_script path: script.py - path: ../../utils/anndata_to_dataframe.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/python:1.1.0 + image: openproblems/base_python:1.0.0 setup: - type: python packages: [ fastparquet ] diff --git a/src/control_methods/sample/config.vsh.yaml b/src/control_methods/sample/config.vsh.yaml index dd02c62e..b2c34689 100644 --- a/src/control_methods/sample/config.vsh.yaml +++ b/src/control_methods/sample/config.vsh.yaml @@ -1,17 +1,17 @@ __merge__: ../../api/comp_control_method.yaml name: sample +label: Sample +summary: Sample predictions from the training data +description: | + This method samples the training data to generate predictions. info: - label: Sample - summary: Sample predictions from the training data - description: | - This method samples the training data to generate predictions. preferred_normalization: counts resources: - type: r_script path: script.R engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/r:1.1.0 + image: openproblems/base_r:1.0.0 setup: - type: r cran: [ arrow, dplyr ] diff --git a/src/control_methods/zeros/config.vsh.yaml b/src/control_methods/zeros/config.vsh.yaml index 186f3689..b456a925 100644 --- a/src/control_methods/zeros/config.vsh.yaml +++ b/src/control_methods/zeros/config.vsh.yaml @@ -1,16 +1,15 @@ __merge__: ../../api/comp_control_method.yaml name: zeros -info: - label: Zeros - summary: Baseline method that predicts all zeros - description: | - Baseline method that predicts all zeros. +label: Zeros +summary: Baseline method that predicts all zeros +description: | + Baseline method that predicts all zeros. resources: - type: python_script path: script.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/python:1.1.0 + image: openproblems/base_python:1.0.0 setup: - type: python packages: [ fastparquet ] diff --git a/src/methods/jn_ap_op2/config.vsh.yaml b/src/methods/jn_ap_op2/config.vsh.yaml index 64717d1a..91c1c19c 100644 --- a/src/methods/jn_ap_op2/config.vsh.yaml +++ b/src/methods/jn_ap_op2/config.vsh.yaml @@ -1,8 +1,8 @@ __merge__: ../../api/comp_method.yaml name: jn_ap_op2 +label: JN-AP-OP2 info: - label: JN-AP-OP2 neurips2023_rank: 20 summary: "Deep learning architecture composed of 2 modules: a sample-centric MLP and a gene-centric MLP" description: | @@ -31,7 +31,7 @@ resources: - path: helper.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/pytorch_nvidia:1.1.0 + image: openproblems/base_pytorch_nvidia:1.0.0 setup: - type: python packages: diff --git a/src/methods/lgc_ensemble/config.vsh.yaml b/src/methods/lgc_ensemble/config.vsh.yaml index 389f3ee9..5bdb4204 100644 --- a/src/methods/lgc_ensemble/config.vsh.yaml +++ b/src/methods/lgc_ensemble/config.vsh.yaml @@ -1,8 +1,8 @@ __merge__: ../../api/wf_method.yaml name: lgc_ensemble +label: LSTM-GRU-CNN Ensemble info: - label: LSTM-GRU-CNN Ensemble neurips2023_rank: 1 summary: An ensemble of LSTM, GRU, and 1D CNN models description: | diff --git a/src/methods/lgc_ensemble_direct/config.vsh.yaml b/src/methods/lgc_ensemble_direct/config.vsh.yaml index 8208986b..34e27f37 100644 --- a/src/methods/lgc_ensemble_direct/config.vsh.yaml +++ b/src/methods/lgc_ensemble_direct/config.vsh.yaml @@ -1,8 +1,8 @@ __merge__: ../../api/comp_method.yaml name: lgc_ensemble_direct +label: LSTM-GRU-CNN Ensemble info: - label: LSTM-GRU-CNN Ensemble neurips2023_rank: 1 summary: An ensemble of LSTM, GRU, and 1D CNN models description: | @@ -54,7 +54,7 @@ resources: engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/pytorch_nvidia:1.1.0 + image: openproblems/base_pytorch_nvidia:1.0.0 setup: - type: python packages: diff --git a/src/methods/lgc_ensemble_predict/config.vsh.yaml b/src/methods/lgc_ensemble_predict/config.vsh.yaml index 3715a6b4..666d2d0f 100644 --- a/src/methods/lgc_ensemble_predict/config.vsh.yaml +++ b/src/methods/lgc_ensemble_predict/config.vsh.yaml @@ -33,7 +33,7 @@ resources: engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/pytorch_nvidia:1.1.0 + image: openproblems/base_pytorch_nvidia:1.0.0 setup: - type: python packages: diff --git a/src/methods/lgc_ensemble_prepare/config.vsh.yaml b/src/methods/lgc_ensemble_prepare/config.vsh.yaml index f4e14ba4..60e56b0e 100644 --- a/src/methods/lgc_ensemble_prepare/config.vsh.yaml +++ b/src/methods/lgc_ensemble_prepare/config.vsh.yaml @@ -56,7 +56,7 @@ resources: engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/pytorch_nvidia:1.1.0 + image: openproblems/base_pytorch_nvidia:1.0.0 setup: - type: python packages: diff --git a/src/methods/lgc_ensemble_train/config.vsh.yaml b/src/methods/lgc_ensemble_train/config.vsh.yaml index fe2e31a7..f963d9ad 100644 --- a/src/methods/lgc_ensemble_train/config.vsh.yaml +++ b/src/methods/lgc_ensemble_train/config.vsh.yaml @@ -46,7 +46,7 @@ resources: engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/pytorch_nvidia:1.1.0 + image: openproblems/base_pytorch_nvidia:1.0.0 setup: - type: python packages: diff --git a/src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml b/src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml index bb25d2cd..58f15e88 100644 --- a/src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml +++ b/src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml @@ -1,8 +1,8 @@ __merge__: ../../api/comp_method.yaml name: nn_retraining_with_pseudolabels +label: NN retraining with pseudolabels info: - label: NN retraining with pseudolabels neurips2023_rank: 3 summary: Neural networks with pseudolabeling and ensemble modelling description: | @@ -40,7 +40,7 @@ resources: engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/tensorflow_nvidia:1.1.0 + image: openproblems/base_tensorflow_nvidia:1.0.0 setup: - type: python packages: diff --git a/src/methods/pyboost/config.vsh.yaml b/src/methods/pyboost/config.vsh.yaml index 87bfd9dc..fa5bea39 100644 --- a/src/methods/pyboost/config.vsh.yaml +++ b/src/methods/pyboost/config.vsh.yaml @@ -1,8 +1,8 @@ __merge__: ../../api/comp_method.yaml name: pyboost +label: Py-boost info: - label: Py-boost neurips2023_rank: 18 summary: "Py-boost predicting t-scores" description: | @@ -38,7 +38,7 @@ resources: - path: ../../utils/anndata_to_dataframe.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/pytorch_nvidia:1.1.0 + image: openproblems/base_pytorch_nvidia:1.0.0 setup: - type: python packages: diff --git a/src/methods/scape/config.vsh.yaml b/src/methods/scape/config.vsh.yaml index cc336bc1..14222f55 100644 --- a/src/methods/scape/config.vsh.yaml +++ b/src/methods/scape/config.vsh.yaml @@ -1,7 +1,7 @@ __merge__: ../../api/comp_method.yaml name: scape +label: ScAPE info: - label: ScAPE neurips2023_rank: 16 summary: Neural network model for drug effect prediction description: | @@ -63,7 +63,7 @@ resources: path: script.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/tensorflow_nvidia:1.1.0 + image: openproblems/base_tensorflow_nvidia:1.0.0 setup: - type: python packages: diff --git a/src/methods/transformer_ensemble/config.vsh.yaml b/src/methods/transformer_ensemble/config.vsh.yaml index 8f609c28..e77173ea 100644 --- a/src/methods/transformer_ensemble/config.vsh.yaml +++ b/src/methods/transformer_ensemble/config.vsh.yaml @@ -1,8 +1,8 @@ __merge__: ../../api/comp_method.yaml name: transformer_ensemble +label: Transformer Ensemble info: - label: Transformer Ensemble neurips2023_rank: 2 summary: An ensemble of four transformer models, trained on diverse feature sets, with a cluster-based sampling strategy and robust validation for optimal performance. description: | @@ -45,7 +45,7 @@ resources: - path: train.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/pytorch_nvidia:1.1.0 + image: openproblems/base_pytorch_nvidia:1.0.0 setup: - type: python packages: diff --git a/src/metrics/mean_rowwise_correlation/config.vsh.yaml b/src/metrics/mean_rowwise_correlation/config.vsh.yaml index 0c227651..faeec66a 100644 --- a/src/metrics/mean_rowwise_correlation/config.vsh.yaml +++ b/src/metrics/mean_rowwise_correlation/config.vsh.yaml @@ -55,7 +55,7 @@ resources: path: script.R engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/r:1.1.0 + image: openproblems/base_r:1.0.0 setup: - type: r packages: proxyC diff --git a/src/metrics/mean_rowwise_error/config.vsh.yaml b/src/metrics/mean_rowwise_error/config.vsh.yaml index b25a1117..dc385eb6 100644 --- a/src/metrics/mean_rowwise_error/config.vsh.yaml +++ b/src/metrics/mean_rowwise_error/config.vsh.yaml @@ -39,7 +39,7 @@ resources: path: script.R engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/r:1.1.0 + image: openproblems/base_r:1.0.0 setup: - type: r packages: proxyC diff --git a/src/process_dataset/add_uns_metadata/config.vsh.yaml b/src/process_dataset/add_uns_metadata/config.vsh.yaml index 4493c95a..66c5f6e8 100644 --- a/src/process_dataset/add_uns_metadata/config.vsh.yaml +++ b/src/process_dataset/add_uns_metadata/config.vsh.yaml @@ -51,7 +51,7 @@ resources: path: script.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/python:1.1.0 + image: openproblems/base_python:1.0.0 runners: - type: executable - type: nextflow diff --git a/src/process_dataset/bootstrap/config.vsh.yaml b/src/process_dataset/bootstrap/config.vsh.yaml index 5b8e081a..2fc13a96 100644 --- a/src/process_dataset/bootstrap/config.vsh.yaml +++ b/src/process_dataset/bootstrap/config.vsh.yaml @@ -54,7 +54,7 @@ resources: path: script.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/python:1.1.0 + image: openproblems/base_python:1.0.0 runners: - type: executable - type: nextflow diff --git a/src/process_dataset/compute_pseudobulk/config.vsh.yaml b/src/process_dataset/compute_pseudobulk/config.vsh.yaml index 71ed2f3f..f8d93fef 100644 --- a/src/process_dataset/compute_pseudobulk/config.vsh.yaml +++ b/src/process_dataset/compute_pseudobulk/config.vsh.yaml @@ -23,7 +23,7 @@ resources: path: script.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/python:1.1.0 + image: openproblems/base_python:1.0.0 setup: - type: python packages: [ pyarrow ] diff --git a/src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml b/src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml index cc87d0ba..64796156 100644 --- a/src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml +++ b/src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml @@ -39,7 +39,7 @@ resources: - path: ../../utils/anndata_to_dataframe.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/python:1.1.0 + image: openproblems/base_python:1.0.0 setup: - type: python packages: [ fastparquet, pandas ] diff --git a/src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml b/src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml index c65c8530..245ac5d4 100644 --- a/src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml +++ b/src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml @@ -77,7 +77,7 @@ resources: path: script.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/python:1.1.0 + image: openproblems/base_python:1.0.0 setup: - type: python packages: [ fastparquet, pandas ] diff --git a/src/process_dataset/filter_obs/config.vsh.yaml b/src/process_dataset/filter_obs/config.vsh.yaml index 3dae29db..c768d245 100644 --- a/src/process_dataset/filter_obs/config.vsh.yaml +++ b/src/process_dataset/filter_obs/config.vsh.yaml @@ -23,7 +23,7 @@ resources: path: script.R engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/r:1.1.0 + image: openproblems/base_r:1.0.0 setup: - type: r cran: [ dplyr, tidyr, purrr, tibble ] diff --git a/src/process_dataset/filter_vars/config.vsh.yaml b/src/process_dataset/filter_vars/config.vsh.yaml index fb0fbfbc..aa021f05 100644 --- a/src/process_dataset/filter_vars/config.vsh.yaml +++ b/src/process_dataset/filter_vars/config.vsh.yaml @@ -23,7 +23,7 @@ resources: path: script.R engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/r:1.1.0 + image: openproblems/base_r:1.0.0 setup: - type: r cran: [ edgeR, limma, dplyr, tidyr, purrr, tibble ] diff --git a/src/process_dataset/generate_id_map/config.vsh.yaml b/src/process_dataset/generate_id_map/config.vsh.yaml index b077035b..c90eec91 100644 --- a/src/process_dataset/generate_id_map/config.vsh.yaml +++ b/src/process_dataset/generate_id_map/config.vsh.yaml @@ -23,7 +23,7 @@ resources: path: script.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/python:1.1.0 + image: openproblems/base_python:1.0.0 runners: - type: executable - type: nextflow diff --git a/src/process_dataset/run_limma/config.vsh.yaml b/src/process_dataset/run_limma/config.vsh.yaml index dd328b65..b995a9aa 100644 --- a/src/process_dataset/run_limma/config.vsh.yaml +++ b/src/process_dataset/run_limma/config.vsh.yaml @@ -45,7 +45,7 @@ resources: path: script.R engines: - type: docker - image: ghcr.io/openproblems-bio/base_images/r:1.1.0 + image: openproblems/base_r:1.0.0 setup: - type: r bioc: [ edgeR, limma, dplyr, tidyr, purrr, tibble, furrr, future ] diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index f68b2542..aff8e19f 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -66,7 +66,7 @@ resources: path: "../../api/task_info.yaml" dependencies: - name: common/extract_metadata - repository: openproblemsv2 + repository: openproblems - name: control_methods/zeros - name: control_methods/sample - name: control_methods/ground_truth @@ -82,9 +82,9 @@ dependencies: - name: metrics/mean_rowwise_error - name: metrics/mean_rowwise_correlation repositories: - - name: openproblemsv2 + - name: openproblems type: github - repo: openproblems-bio/openproblems-v2 + repo: openproblems-bio/openproblems tag: main_build runners: - type: executable