From a13201f468f3a9672d759719aa6354a1e49a0019 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Thu, 18 Apr 2024 16:02:36 +0200
Subject: [PATCH] Refactor project structure (#5)

* move components

* fix config

* update readme

* fix dependencies after refactor

* add build ci script

* add test ci script

* add dependabot

* refactor generation script

* change metric output to anndata

* wip add unit test to components

* fix resources path

* also run method and metric in resource script

* remove unneeded scripts

* update paths

* fix metric

* add comment

* fix example path

* add file types to api files

* improve unit test

* finetune sync

* fix api file

* add changelog and PR template

* don't publish h5ad files

* rename zeros component

* fix script

* add benchmark wf

* fix workflows, api and scripts

* add download resources script

* update readme

* add missing metric info

* add missing dataset_summary

* remove trace before run

* fix missing dataset_id

* move tracing
---
 .github/PULL_REQUEST_TEMPLATE.md              |  17 +
 .github/dependabot.yml                        |   6 +
 .github/workflows/build.yml                   |  93 ++++
 .github/workflows/test.yml                    | 107 +++++
 .gitignore                                    |   4 +-
 CHANGELOG.md                                  |  10 +
 README.md                                     | 451 +++++++++++++++++-
 _viash.yaml                                   |   2 +-
 scripts/1_sync_resources.sh                   |   5 -
 scripts/2_render_readme.sh                    |  31 --
 scripts/4_process_dataset.sh                  |  56 ---
 scripts/5_run_rf_method.sh                    |   8 -
 scripts/6_run_rf_metric.sh                    |   9 -
 scripts/7_run_baseline_methods.sh             |  21 -
 ...uild_components.sh => build_components.sh} |   0
 scripts/download_resources.sh                 |   9 +
 scripts/generate_resources.sh                 |  61 +++
 scripts/render_readme.sh                      |  11 +
 scripts/run_benchmark.sh                      |  27 ++
 scripts/test_components.sh                    |   3 +
 .../component_tests/run_and_check_output.py   | 143 ++++++
 src/dge_perturbation_prediction/README.md     | 366 --------------
 .../api/file_id_map.yaml                      |  19 -
 .../api/file_lincs_id_compound_mapping.yaml   |  24 -
 .../api/file_prediction.yaml                  |  16 -
 .../api/task_info.yaml                        |  18 -
 .../metrics/mean_rowwise_rmse/script.py       |  33 --
 .../process_dataset/workflow/config.vsh.yaml  |  64 ---
 .../api/comp_control_method.yaml              |  12 +-
 .../api/comp_method.yaml                      |  10 +-
 .../api/comp_metric.yaml                      |  10 +-
 .../api/comp_process_dataset.yaml             |  15 +-
 .../api/file_de_test_h5ad.yaml}               |   1 +
 src/task/api/file_de_test_parquet.yaml        |  45 ++
 .../api/file_de_train_h5ad.yaml}              |   1 +
 src/task/api/file_de_train_parquet.yaml       |  41 ++
 src/task/api/file_id_map.yaml                 |  19 +
 .../api/file_lincs_id_compound_mapping.yaml   |  24 +
 src/task/api/file_prediction.yaml             |  11 +
 .../api/file_sc_counts.yaml                   |   1 +
 .../api/file_score.yaml                       |   3 +-
 src/task/api/task_info.yaml                   |  97 ++++
 .../ground_truth/config.vsh.yaml              |   2 +
 .../control_methods/ground_truth/script.R     |   0
 .../control_methods/sample/config.vsh.yaml    |   2 +
 .../control_methods/sample/script.R           |   0
 .../control_methods/zeros}/config.vsh.yaml    |   9 +-
 .../control_methods/zeros}/script.py          |   0
 .../methods/random_forest/config.vsh.yaml     |   2 +
 .../methods/random_forest/script.R            |   0
 .../metrics/mean_rowwise_rmse/config.vsh.yaml |   5 +
 src/task/metrics/mean_rowwise_rmse/script.py  |  40 ++
 .../clean_sc_counts/config.vsh.yaml           |   2 +-
 .../process_dataset/clean_sc_counts/script.py |   0
 .../compute_pseudobulk/config.vsh.yaml        |   2 +-
 .../compute_pseudobulk/script.py              |   0
 .../convert_h5ad_to_parquet/config.vsh.yaml   |   2 +-
 .../convert_h5ad_to_parquet/script.py         |   0
 .../process_dataset/run_limma/config.vsh.yaml |   2 +-
 .../process_dataset/run_limma/script.R        |   0
 .../process_dataset/workflow/config.vsh.yaml  |  51 ++
 .../process_dataset/workflow/main.nf          |  40 +-
 .../workflows/run_benchmark/config.vsh.yaml   |  87 ++++
 src/task/workflows/run_benchmark/main.nf      | 177 +++++++
 64 files changed, 1598 insertions(+), 729 deletions(-)
 create mode 100644 .github/PULL_REQUEST_TEMPLATE.md
 create mode 100644 .github/dependabot.yml
 create mode 100644 .github/workflows/build.yml
 create mode 100644 .github/workflows/test.yml
 create mode 100644 CHANGELOG.md
 delete mode 100755 scripts/1_sync_resources.sh
 delete mode 100755 scripts/2_render_readme.sh
 delete mode 100755 scripts/4_process_dataset.sh
 delete mode 100755 scripts/5_run_rf_method.sh
 delete mode 100755 scripts/6_run_rf_metric.sh
 delete mode 100755 scripts/7_run_baseline_methods.sh
 rename scripts/{3_build_components.sh => build_components.sh} (100%)
 create mode 100755 scripts/download_resources.sh
 create mode 100755 scripts/generate_resources.sh
 create mode 100755 scripts/render_readme.sh
 create mode 100755 scripts/run_benchmark.sh
 create mode 100755 scripts/test_components.sh
 create mode 100644 src/common/component_tests/run_and_check_output.py
 delete mode 100644 src/dge_perturbation_prediction/README.md
 delete mode 100644 src/dge_perturbation_prediction/api/file_id_map.yaml
 delete mode 100644 src/dge_perturbation_prediction/api/file_lincs_id_compound_mapping.yaml
 delete mode 100644 src/dge_perturbation_prediction/api/file_prediction.yaml
 delete mode 100644 src/dge_perturbation_prediction/api/task_info.yaml
 delete mode 100644 src/dge_perturbation_prediction/metrics/mean_rowwise_rmse/script.py
 delete mode 100644 src/dge_perturbation_prediction/process_dataset/workflow/config.vsh.yaml
 rename src/{dge_perturbation_prediction => task}/api/comp_control_method.yaml (64%)
 rename src/{dge_perturbation_prediction => task}/api/comp_method.yaml (66%)
 rename src/{dge_perturbation_prediction => task}/api/comp_metric.yaml (65%)
 rename src/{dge_perturbation_prediction => task}/api/comp_process_dataset.yaml (63%)
 rename src/{dge_perturbation_prediction/api/file_de_test.yaml => task/api/file_de_test_h5ad.yaml} (99%)
 create mode 100644 src/task/api/file_de_test_parquet.yaml
 rename src/{dge_perturbation_prediction/api/file_de_train.yaml => task/api/file_de_train_h5ad.yaml} (99%)
 create mode 100644 src/task/api/file_de_train_parquet.yaml
 create mode 100644 src/task/api/file_id_map.yaml
 create mode 100644 src/task/api/file_lincs_id_compound_mapping.yaml
 create mode 100644 src/task/api/file_prediction.yaml
 rename src/{dge_perturbation_prediction => task}/api/file_sc_counts.yaml (99%)
 rename src/{dge_perturbation_prediction => task}/api/file_score.yaml (91%)
 create mode 100644 src/task/api/task_info.yaml
 rename src/{dge_perturbation_prediction => task}/control_methods/ground_truth/config.vsh.yaml (91%)
 rename src/{dge_perturbation_prediction => task}/control_methods/ground_truth/script.R (100%)
 rename src/{dge_perturbation_prediction => task}/control_methods/sample/config.vsh.yaml (90%)
 rename src/{dge_perturbation_prediction => task}/control_methods/sample/script.R (100%)
 rename src/{dge_perturbation_prediction/control_methods/baseline_zero => task/control_methods/zeros}/config.vsh.yaml (70%)
 rename src/{dge_perturbation_prediction/control_methods/baseline_zero => task/control_methods/zeros}/script.py (100%)
 rename src/{dge_perturbation_prediction => task}/methods/random_forest/config.vsh.yaml (92%)
 rename src/{dge_perturbation_prediction => task}/methods/random_forest/script.R (100%)
 rename src/{dge_perturbation_prediction => task}/metrics/mean_rowwise_rmse/config.vsh.yaml (89%)
 create mode 100644 src/task/metrics/mean_rowwise_rmse/script.py
 rename src/{dge_perturbation_prediction => task}/process_dataset/clean_sc_counts/config.vsh.yaml (94%)
 rename src/{dge_perturbation_prediction => task}/process_dataset/clean_sc_counts/script.py (100%)
 rename src/{dge_perturbation_prediction => task}/process_dataset/compute_pseudobulk/config.vsh.yaml (93%)
 rename src/{dge_perturbation_prediction => task}/process_dataset/compute_pseudobulk/script.py (100%)
 rename src/{dge_perturbation_prediction => task}/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml (95%)
 rename src/{dge_perturbation_prediction => task}/process_dataset/convert_h5ad_to_parquet/script.py (100%)
 rename src/{dge_perturbation_prediction => task}/process_dataset/run_limma/config.vsh.yaml (95%)
 rename src/{dge_perturbation_prediction => task}/process_dataset/run_limma/script.R (100%)
 create mode 100644 src/task/process_dataset/workflow/config.vsh.yaml
 rename src/{dge_perturbation_prediction => task}/process_dataset/workflow/main.nf (55%)
 create mode 100644 src/task/workflows/run_benchmark/config.vsh.yaml
 create mode 100644 src/task/workflows/run_benchmark/main.nf

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 00000000..67f3d482
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,17 @@
+## Describe your changes
+
+<!-- Describe your changes  -->
+
+## Checklist before requesting a review
+- [ ] I have performed a self-review of my code
+
+- Check the correct box. Does this PR contain:
+  - [ ] Breaking changes
+  - [ ] New functionality (new method, new metric, ...)
+  - [ ] Major changes
+  - [ ] Minor changes
+  - [ ] Bug fixes
+
+- [ ] Proposed changes are described in the CHANGELOG.md
+
+- [ ] CI Tests succeed and look good!
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 00000000..90963715
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,6 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "daily"
\ No newline at end of file
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 00000000..8b96d16c
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,93 @@
+name: build
+
+on:
+  push:
+    branches: [ 'main' ]
+
+jobs:
+  # phase 1
+  list:
+    runs-on: ubuntu-latest
+
+    outputs:
+      component_matrix: ${{ steps.set_matrix.outputs.matrix }}
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - uses: viash-io/viash-actions/setup@v5
+
+    - name: Remove target folder from .gitignore
+      run: |
+        # allow publishing the target folder
+        sed -i '/^target.*/d' .gitignore
+
+    - uses: viash-io/viash-actions/ns-build@v5
+      with:
+        config_mod: .functionality.version := 'main_build'
+        parallel: true
+
+    - name: Deploy to target branch
+      uses: peaceiris/actions-gh-pages@v3
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        publish_dir: .
+        publish_branch: main_build
+
+    - id: ns_list
+      uses: viash-io/viash-actions/ns-list@v5
+      with:
+        platform: docker
+        src: src
+        format: json
+
+    - id: set_matrix
+      run: |
+        echo "matrix=$(jq -c '[ .[] | 
+          { 
+            "name": (.functionality.namespace + "/" + .functionality.name),
+            "dir": .info.config | capture("^(?<dir>.*\/)").dir
+          }
+        ]' ${{ steps.ns_list.outputs.output_file }} )" >> $GITHUB_OUTPUT
+
+  # phase 2
+  build:
+    needs: list
+
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        component: ${{ fromJson(needs.list.outputs.component_matrix) }}
+
+    steps:
+    # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.'
+    - uses: data-intuitive/reclaim-the-bytes@v2
+    
+    - uses: actions/checkout@v4
+
+    - uses: viash-io/viash-actions/setup@v5
+
+    - name: Build container
+      uses: viash-io/viash-actions/ns-build@v5
+      with:
+        config_mod: .functionality.version := 'main_build'
+        platform: docker
+        src: ${{ matrix.component.dir }}
+        setup: build
+        
+    - name: Login to container registry
+      uses: docker/login-action@v3
+      with:
+        registry: ghcr.io
+        username: ${{ secrets.GTHB_USER }}
+        password: ${{ secrets.GTHB_PAT }}
+
+    - name: Push container
+      uses: viash-io/viash-actions/ns-build@v5
+      with:
+        config_mod: .functionality.version := 'main_build'
+        platform: docker
+        src: ${{ matrix.component.dir }}
+        setup: push
\ No newline at end of file
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 00000000..d1892c8a
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,107 @@
+name: test
+
+on:
+  pull_request:
+  push:
+    branches: [ '**' ]
+
+jobs:
+  run_ci_check_job:
+    runs-on: ubuntu-latest
+    outputs:
+      run_ci: ${{ steps.github_cli.outputs.check }}
+    steps:
+      - name: 'Check if branch has an existing pull request and the trigger was a push'
+        id: github_cli
+        run: |
+          pull_request=$(gh pr list -R ${{ github.repository }} -H ${{ github.ref_name }} --json url --state open --limit 1 | jq '.[0].url')
+          # If the branch has a PR and this run was triggered by a push event, do not run
+          if [[ "$pull_request" != "null" && "$GITHUB_REF_NAME" != "main" && "${{ github.event_name == 'push' }}" == "true" && "${{ !contains(github.event.head_commit.message, 'ci force') }}" == "true" ]]; then
+            echo "check=false" >> $GITHUB_OUTPUT
+          else
+            echo "check=true" >> $GITHUB_OUTPUT
+          fi
+        env:
+          GITHUB_TOKEN: ${{ secrets.GTHB_PAT }}
+
+  # phase 1
+  list:
+    needs: run_ci_check_job
+    env:
+      s3_bucket: s3://openproblems-bio/public/neurips-2023-competition/workflow-resources/
+    runs-on: ubuntu-latest
+    if: ${{ needs.run_ci_check_job.outputs.run_ci == 'true' }}
+
+    outputs:
+      matrix: ${{ steps.set_matrix.outputs.matrix }}
+      cache_key: ${{ steps.cache.outputs.cache_key }}
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0 
+
+    - uses: viash-io/viash-actions/setup@v5
+
+    - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5
+      id: cache
+      with:
+        s3_bucket: $s3_bucket
+        dest_path: resources
+        cache_key_prefix: resources__
+
+    - id: ns_list
+      uses: viash-io/viash-actions/ns-list@v5
+      with:
+        platform: docker
+        format: json
+
+    - id: ns_list_filtered
+      uses: viash-io/viash-actions/project/detect-changed-components@v5
+      with:
+        input_file: "${{ steps.ns_list.outputs.output_file }}"
+
+    - id: set_matrix
+      run: |
+        echo "matrix=$(jq -c '[ .[] | 
+          { 
+            "name": (.functionality.namespace + "/" + .functionality.name),
+            "config": .info.config
+          }
+        ]' ${{ steps.ns_list_filtered.outputs.output_file }} )" >> $GITHUB_OUTPUT
+
+  # phase 2
+  viash_test:
+    needs: list
+    if: ${{ needs.list.outputs.matrix != '[]' && needs.list.outputs.matrix != '' }}
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        component: ${{ fromJson(needs.list.outputs.matrix) }}
+
+    steps:
+    # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.'
+    - uses: data-intuitive/reclaim-the-bytes@v2
+    
+    - uses: actions/checkout@v4
+
+    - uses: viash-io/viash-actions/setup@v5
+
+    # use cache
+    - name: Cache resources data
+      uses: actions/cache@v4
+      timeout-minutes: 10
+      with:
+        path: resources
+        key: ${{ needs.list.outputs.cache_key }}
+
+    - name: Run test
+      timeout-minutes: 30
+      run: |
+        VIASH_TEMP=$RUNNER_TEMP/viash viash test \
+          "${{ matrix.component.config }}" \
+          --cpus 2 \
+          --memory "5gb"
+
diff --git a/.gitignore b/.gitignore
index 52c256ce..67c0b581 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,6 @@ work
 target
 .idea
 .vscode
-.DS_Store
\ No newline at end of file
+.DS_Store
+output
+trace-*
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..f30f3be3
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,10 @@
+# task-dge-perturbation-prediction 0.1.0
+
+Initial release of the DGE Perturbation Prediction task. Initial components:
+
+* `src/task/process_dataset`: Compute the DGE data from the raw single-cell counts using Limma.
+* `src/task/control_methods`: Baseline control methods: sample, ground_truth, baseline_zero.
+* `src/task/methods`: DGE perturbation prediction methods: random_forest.
+* `src/task/metrics`: Evaluation metrics: mean_rowwise_rmse.
+
+
diff --git a/README.md b/README.md
index bd088ab1..6febe0d8 100644
--- a/README.md
+++ b/README.md
@@ -1,45 +1,460 @@
-# task-dge-perturbation-prediction
+# DGE Perturbation Prediction
 
-This repository contains the code for the task of predicting the perturbation effects.
 
-## Install
+<!--
+This file is automatically generated from the tasks's api/*.yaml files.
+Do not edit this file directly.
+-->
 
-You need to have Docker, Java, and Viash installed. Please follow [these instructions](https://openproblems.bio/documentation/fundamentals/requirements) to install all of the required dependencies.
+Predicting how small molecules change gene expression in different cell
+types.
+
+Path to source:
+[`src/task`](https://github.com/openproblems-bio/task-dge-perturbation-prediction/tree/main/src/task)
+
+## Installation
+
+You need to have Docker, Java, and Viash installed. Follow
+[these instructions](https://openproblems.bio/documentation/fundamentals/requirements)
+to install the required dependencies.
 
 ## First steps
 
-### 1. Clone this repository
+To get started, you can run the following commands:
 
 ```bash
 git clone git@github.com:openproblems-bio/task-dge-perturbation-prediction.git
-```
 
-### 2. Sync resources
+cd task-dge-perturbation-prediction
 
-```bash
-scripts/1_sync_resources.sh
+# download resources
+scripts/download_resources.sh
 ```
 
-### 3. Build the Docker images
+To run the benchmark, you first need to build the components. Afterwards, you can run the benchmark:
 
 ```bash
-scripts/3_build_components.sh
+viash ns build --parallel --setup cachedbuild
+
+scripts/run_benchmark.sh
 ```
 
-### 4. Process the raw data
+After adding a component, it is recommended to run the tests to ensure that the component is working correctly:
 
 ```bash
-scripts/4_process_dataset.sh
+viash ns test --parallel
 ```
 
-### 5. Run the baseline
+Optionally, you can provide the `--query` argument to test only a subset of components:
 
 ```bash
-scripts/5_run_rf_method.sh
+viash ns test --parallel --query "component_name"
 ```
 
-### 6. Evaluate the baseline
+## Motivation
 
-```bash
-scripts/6_run_rf_metric.sh
+Human biology can be complex, in part due to the function and interplay
+of the body’s approximately 37 trillion cells, which are organized into
+tissues, organs, and systems. However, recent advances in single-cell
+technologies have provided unparalleled insight into the function of
+cells and tissues at the level of DNA, RNA, and proteins. Yet leveraging
+single-cell methods to develop medicines requires mapping causal links
+between chemical perturbations and the downstream impact on cell state.
+These experiments are costly and labor intensive, and not all cells and
+tissues are amenable to high-throughput transcriptomic screening. If
+data science could help accurately predict chemical perturbations in new
+cell types, it could accelerate and expand the development of new
+medicines.
+
+Several methods have been developed for drug perturbation prediction,
+most of which are variations on the autoencoder architecture (Dr.VAE,
+scGEN, and ChemCPA). However, these methods lack proper benchmarking
+datasets with diverse cell types to determine how well they generalize.
+The largest available training dataset is the NIH-funded Connectivity
+Map (CMap), which comprises over 1.3M small molecule perturbation
+measurements. However, the CMap includes observations of only 978 genes,
+less than 5% of all genes. Furthermore, the CMap data is comprised
+almost entirely of measurements in cancer cell lines, which may not
+accurately represent human biology.
+
+## Description
+
+This task aims to predict how small molecules change gene expression in
+different cell types. This task was a [Kaggle
+competition](https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/overview)
+as part of the [NeurIPS 2023 competition
+track](https://neurips.cc/virtual/2023/competition/66586).
+
+The task is to predict the gene expression profile of a cell after a
+small molecule perturbation. For this competition, we designed and
+generated a novel single-cell perturbational dataset in human peripheral
+blood mononuclear cells (PBMCs). We selected 144 compounds from the
+Library of Integrated Network-Based Cellular Signatures (LINCS)
+Connectivity Map dataset ([PMID:
+29195078](https://pubmed.ncbi.nlm.nih.gov/29195078/)) and measured
+single-cell gene expression profiles after 24 hours of treatment. The
+experiment was repeated in three healthy human donors, and the compounds
+were selected based on diverse transcriptional signatures observed in
+CD34+ hematopoietic stem cells (data not released). We performed this
+experiment in human PBMCs because the cells are commercially available
+with pre-obtained consent for public release and PBMCs are a primary,
+disease-relevant tissue that contains multiple mature cell types
+(including T-cells, B-cells, myeloid cells, and NK cells) with
+established markers for annotation of cell types. To supplement this
+dataset, we also measured cells from each donor at baseline with joint
+scRNA and single-cell chromatin accessibility measurements using the 10x
+Multiome assay. We hope that the addition of rich multi-omic data for
+each donor and cell type at baseline will help establish biological
+priors that explain the susceptibility of particular genes to exhibit
+perturbation responses in difference biological contexts.
+
+## Authors & contributors
+
+| name              | roles  |
+|:------------------|:-------|
+| Artur Szałata     | author |
+| Robrecht Cannoodt | author |
+
+## API
+
+``` mermaid
+flowchart LR
+  file_sc_counts("Single Cell Counts")
+  comp_process_dataset[/"Data processor"/]
+  file_de_train("DE train")
+  file_de_test("DE test")
+  file_id_map("ID Map")
+  comp_control_method[/"Control Method"/]
+  comp_method[/"Method"/]
+  comp_metric[/"Metric"/]
+  file_prediction("Prediction")
+  file_score("Score")
+  file_lincs_id_compound_mapping("Mapping compound names to lincs ids and smiles")
+  file_sc_counts---comp_process_dataset
+  comp_process_dataset-->file_de_train
+  comp_process_dataset-->file_de_test
+  comp_process_dataset-->file_id_map
+  file_de_train---comp_control_method
+  file_de_train---comp_method
+  file_de_test---comp_control_method
+  file_de_test---comp_metric
+  file_id_map---comp_control_method
+  file_id_map---comp_method
+  comp_control_method-->file_prediction
+  comp_method-->file_prediction
+  comp_metric-->file_score
+  file_prediction---comp_metric
+  file_lincs_id_compound_mapping---comp_process_dataset
 ```
+
+## File format: Single Cell Counts
+
+Anndata with the counts of the whole dataset.
+
+Example file: `resources/neurips-2023-raw/sc_counts.h5ad`
+
+Format:
+
+<div class="small">
+
+    AnnData object
+     obs: 'dose_uM', 'timepoint_hr', 'raw_cell_id', 'hashtag_id', 'well', 'container_format', 'row', 'col', 'plate_name', 'cell_id', 'cell_type', 'split', 'donor_id', 'sm_name'
+     obsm: 'HTO_clr', 'X_pca', 'X_umap', 'protein_counts'
+     layers: 'counts'
+
+</div>
+
+Slot description:
+
+<div class="small">
+
+| Slot                      | Type      | Description                                    |
+|:--------------------------|:----------|:-----------------------------------------------|
+| `obs["dose_uM"]`          | `integer` | Dose in micromolar.                            |
+| `obs["timepoint_hr"]`     | `float`   | Time point measured in hours.                  |
+| `obs["raw_cell_id"]`      | `string`  | Original cell identifier.                      |
+| `obs["hashtag_id"]`       | `string`  | Identifier for hashtag oligo.                  |
+| `obs["well"]`             | `string`  | Well location in the plate.                    |
+| `obs["container_format"]` | `string`  | Format of the container (e.g., 96-well plate). |
+| `obs["row"]`              | `string`  | Row in the plate.                              |
+| `obs["col"]`              | `integer` | Column in the plate.                           |
+| `obs["plate_name"]`       | `string`  | Name of the plate.                             |
+| `obs["cell_id"]`          | `string`  | Unique cell identifier.                        |
+| `obs["cell_type"]`        | `string`  | Type of cell (e.g., B cells, T cells CD4+).    |
+| `obs["split"]`            | `string`  | Dataset split type (e.g., control, treated).   |
+| `obs["donor_id"]`         | `string`  | Identifier for the donor.                      |
+| `obs["sm_name"]`          | `string`  | Name of the small molecule used for treatment. |
+| `obsm["HTO_clr"]`         | `matrix`  | Corrected counts for hashing tags.             |
+| `obsm["X_pca"]`           | `matrix`  | Principal component analysis results.          |
+| `obsm["X_umap"]`          | `matrix`  | UMAP dimensionality reduction results.         |
+| `obsm["protein_counts"]`  | `matrix`  | Count data for proteins.                       |
+| `layers["counts"]`        | `matrix`  | Raw count data for each gene across cells.     |
+
+</div>
+
+## Component type: Data processor
+
+Path:
+[`src/process_dataset`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/process_dataset)
+
+A DGE regression dataset processor
+
+Arguments:
+
+<div class="small">
+
+| Name                          | Type   | Description                                                                                  |
+|:------------------------------|:-------|:---------------------------------------------------------------------------------------------|
+| `--sc_counts`                 | `file` | Anndata with the counts of the whole dataset.                                                |
+| `--lincs_id_compound_mapping` | `file` | Parquet file mapping compound names to lincs ids and smiles.                                 |
+| `--de_train`                  | `file` | (*Output*) Differential expression results for training.                                     |
+| `--de_test`                   | `file` | (*Output*) Differential expression results for testing.                                      |
+| `--id_map`                    | `file` | (*Output*) File indicates the order of de_test, the cell types and the small molecule names. |
+
+</div>
+
+## File format: DE train
+
+Differential expression results for training.
+
+Example file: `resources/neurips-2023-data/de_train.h5ad`
+
+Format:
+
+<div class="small">
+
+    AnnData object
+     obs: 'cell_type', 'sm_name', 'sm_lincs_id', 'SMILES', 'split', 'control'
+     layers: 'P.Value', 'adj.P.Value', 'is_de', 'is_de_adj', 'logFC', 'sign_log10_pval'
+
+</div>
+
+Slot description:
+
+<div class="small">
+
+| Slot                        | Type      | Description                                                                                                                                                                                                                                                                                                      |
+|:----------------------------|:----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `obs["cell_type"]`          | `string`  | The annotated cell type of each cell based on RNA expression.                                                                                                                                                                                                                                                    |
+| `obs["sm_name"]`            | `string`  | The primary name for the (parent) compound (in a standardized representation) as chosen by LINCS. This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                        |
+| `obs["sm_lincs_id"]`        | `string`  | The global LINCS ID (parent) compound (in a standardized representation). This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                                                |
+| `obs["SMILES"]`             | `string`  | Simplified molecular-input line-entry system (SMILES) representations of the compounds used in the experiment. This is a 1D representation of molecular structure. These SMILES are provided by Cellarity based on the specific compounds ordered for this experiment.                                           |
+| `obs["split"]`              | `string`  | Split. Must be one of ‘control’, ‘train’, ‘public_test’, or ‘private_test’.                                                                                                                                                                                                                                      |
+| `obs["control"]`            | `boolean` | Boolean indicating whether this instance was used as a control.                                                                                                                                                                                                                                                  |
+| `layers["P.Value"]`         | `double`  | P-value of the differential expression test.                                                                                                                                                                                                                                                                     |
+| `layers["adj.P.Value"]`     | `double`  | Adjusted P-value of the differential expression test.                                                                                                                                                                                                                                                            |
+| `layers["is_de"]`           | `boolean` | Whether the gene is differentially expressed.                                                                                                                                                                                                                                                                    |
+| `layers["is_de_adj"]`       | `boolean` | Whether the gene is differentially expressed after adjustment.                                                                                                                                                                                                                                                   |
+| `layers["logFC"]`           | `double`  | Log fold change of the differential expression test.                                                                                                                                                                                                                                                             |
+| `layers["sign_log10_pval"]` | `double`  | Differential expression value (-log10(p-value) \* sign(LFC)) for each gene. Here, LFC is the estimated log-fold change in expression between the treatment and control condition after shrinkage as calculated by Limma. Positive LFC means the gene goes up in the treatment condition relative to the control. |
+
+</div>
+
+## File format: DE test
+
+Differential expression results for testing.
+
+Example file: `resources/neurips-2023-data/de_test.h5ad`
+
+Format:
+
+<div class="small">
+
+    AnnData object
+     obs: 'id', 'cell_type', 'sm_name', 'sm_lincs_id', 'SMILES', 'split', 'control'
+     layers: 'P.Value', 'adj.P.Value', 'is_de', 'is_de_adj', 'logFC', 'sign_log10_pval'
+
+</div>
+
+Slot description:
+
+<div class="small">
+
+| Slot                        | Type      | Description                                                                                                                                                                                                                                                                                                      |
+|:----------------------------|:----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `obs["id"]`                 | `integer` | Index of the test observation.                                                                                                                                                                                                                                                                                   |
+| `obs["cell_type"]`          | `string`  | The annotated cell type of each cell based on RNA expression.                                                                                                                                                                                                                                                    |
+| `obs["sm_name"]`            | `string`  | The primary name for the (parent) compound (in a standardized representation) as chosen by LINCS. This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                        |
+| `obs["sm_lincs_id"]`        | `string`  | The global LINCS ID (parent) compound (in a standardized representation). This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                                                |
+| `obs["SMILES"]`             | `string`  | Simplified molecular-input line-entry system (SMILES) representations of the compounds used in the experiment. This is a 1D representation of molecular structure. These SMILES are provided by Cellarity based on the specific compounds ordered for this experiment.                                           |
+| `obs["split"]`              | `string`  | Split. Must be one of ‘control’, ‘train’, ‘public_test’, or ‘private_test’.                                                                                                                                                                                                                                      |
+| `obs["control"]`            | `boolean` | Boolean indicating whether this instance was used as a control.                                                                                                                                                                                                                                                  |
+| `layers["P.Value"]`         | `double`  | P-value of the differential expression test.                                                                                                                                                                                                                                                                     |
+| `layers["adj.P.Value"]`     | `double`  | Adjusted P-value of the differential expression test.                                                                                                                                                                                                                                                            |
+| `layers["is_de"]`           | `boolean` | Whether the gene is differentially expressed.                                                                                                                                                                                                                                                                    |
+| `layers["is_de_adj"]`       | `boolean` | Whether the gene is differentially expressed after adjustment.                                                                                                                                                                                                                                                   |
+| `layers["logFC"]`           | `double`  | Log fold change of the differential expression test.                                                                                                                                                                                                                                                             |
+| `layers["sign_log10_pval"]` | `double`  | Differential expression value (-log10(p-value) \* sign(LFC)) for each gene. Here, LFC is the estimated log-fold change in expression between the treatment and control condition after shrinkage as calculated by Limma. Positive LFC means the gene goes up in the treatment condition relative to the control. |
+
+</div>
+
+## File format: ID Map
+
+File indicates the order of de_test, the cell types and the small
+molecule names.
+
+Example file: `resources/neurips-2023-data/id_map.csv`
+
+Format:
+
+<div class="small">
+
+    AnnData object
+     obs: 'id', 'cell_type', 'sm_name'
+
+</div>
+
+Slot description:
+
+<div class="small">
+
+| Slot               | Type      | Description                    |
+|:-------------------|:----------|:-------------------------------|
+| `obs["id"]`        | `integer` | Index of the test observation. |
+| `obs["cell_type"]` | `string`  | Cell type name.                |
+| `obs["sm_name"]`   | `string`  | Small molecule name.           |
+
+</div>
+
+## Component type: Control Method
+
+Path:
+[`src/control_methods`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/control_methods)
+
+A control method.
+
+Arguments:
+
+<div class="small">
+
+| Name         | Type   | Description                                                                       |
+|:-------------|:-------|:----------------------------------------------------------------------------------|
+| `--de_train` | `file` | Differential expression results for training.                                     |
+| `--de_test`  | `file` | Differential expression results for testing.                                      |
+| `--id_map`   | `file` | File indicates the order of de_test, the cell types and the small molecule names. |
+| `--output`   | `file` | (*Output*) Differential Gene Expression prediction.                               |
+
+</div>
+
+## Component type: Method
+
+Path:
+[`src/methods`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/methods)
+
+A regression method.
+
+Arguments:
+
+<div class="small">
+
+| Name         | Type   | Description                                                                       |
+|:-------------|:-------|:----------------------------------------------------------------------------------|
+| `--de_train` | `file` | Differential expression results for training.                                     |
+| `--id_map`   | `file` | File indicates the order of de_test, the cell types and the small molecule names. |
+| `--output`   | `file` | (*Output*) Differential Gene Expression prediction.                               |
+
+</div>
+
+## Component type: Metric
+
+Path:
+[`src/metrics`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/metrics)
+
+A metric to compare two predictions.
+
+Arguments:
+
+<div class="small">
+
+| Name           | Type   | Description                                       |
+|:---------------|:-------|:--------------------------------------------------|
+| `--de_test`    | `file` | Differential expression results for testing.      |
+| `--prediction` | `file` | Differential Gene Expression prediction.          |
+| `--output`     | `file` | (*Output*) File indicating the score of a metric. |
+
+</div>
+
+## File format: Prediction
+
+Differential Gene Expression prediction
+
+Example file: `resources/neurips-2023-data/output_rf.parquet`
+
+Format:
+
+<div class="small">
+
+    AnnData object
+     obs: 'id'
+     layers: 'sign_log10_pval'
+
+</div>
+
+Slot description:
+
+<div class="small">
+
+| Slot                        | Type      | Description                                                          |
+|:----------------------------|:----------|:---------------------------------------------------------------------|
+| `obs["id"]`                 | `integer` | Index of the test observation.                                       |
+| `layers["sign_log10_pval"]` | `double`  | Predicted sign of the logFC times the log10 of the adjusted p-value. |
+
+</div>
+
+## File format: Score
+
+File indicating the score of a metric.
+
+Example file: `resources/neurips-2023-data/score_rf.json`
+
+Format:
+
+<div class="small">
+
+    AnnData object
+     uns: 'dataset_id', 'method_id', 'metric_ids', 'metric_values'
+
+</div>
+
+Slot description:
+
+<div class="small">
+
+| Slot                   | Type     | Description                                                                                  |
+|:-----------------------|:---------|:---------------------------------------------------------------------------------------------|
+| `uns["dataset_id"]`    | `string` | A unique identifier for the dataset.                                                         |
+| `uns["method_id"]`     | `string` | A unique identifier for the method.                                                          |
+| `uns["metric_ids"]`    | `string` | One or more unique metric identifiers.                                                       |
+| `uns["metric_values"]` | `double` | The metric values obtained for the given prediction. Must be of same length as ‘metric_ids’. |
+
+</div>
+
+## File format: Mapping compound names to lincs ids and smiles
+
+Parquet file mapping compound names to lincs ids and smiles.
+
+Example file:
+`resources/neurips-2023-raw/lincs_id_compound_mapping.parquet`
+
+Format:
+
+<div class="small">
+
+    AnnData object
+     obs: 'compound_id', 'sm_lincs_id', 'sm_name', 'smiles'
+
+</div>
+
+Slot description:
+
+<div class="small">
+
+| Slot                 | Type     | Description                                           |
+|:---------------------|:---------|:------------------------------------------------------|
+| `obs["compound_id"]` | `string` | Unique identifier for the compound.                   |
+| `obs["sm_lincs_id"]` | `string` | LINCS identifier for the compound.                    |
+| `obs["sm_name"]`     | `string` | Name of the compound.                                 |
+| `obs["smiles"]`      | `string` | SMILES notation representing the molecular structure. |
+
+</div>
+
diff --git a/_viash.yaml b/_viash.yaml
index 06703fc6..c48ca123 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -7,7 +7,7 @@ config_mods: |
   .functionality.version := 'dev'
   .functionality.arguments[.multiple == true].multiple_sep := ';'
   .platforms[.type == 'docker'].target_registry := 'ghcr.io'
-  .platforms[.type == 'docker'].target_organization := 'openproblems-bio'
+  .platforms[.type == 'docker'].target_organization := 'openproblems-bio/task-dge-perturbation-prediction'
   .platforms[.type == 'docker'].target_image_source := 'https://github.com/openproblems-bio/task-dge-perturbation-prediction'
   .platforms[.type == "nextflow"].directives.tag := "$id"
   .platforms[.type == "nextflow"].auto.simplifyOutput := false
diff --git a/scripts/1_sync_resources.sh b/scripts/1_sync_resources.sh
deleted file mode 100755
index b6c6e227..00000000
--- a/scripts/1_sync_resources.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-
-mkdir -p resources/neurips-2023-raw/
-aws s3 cp s3://openproblems-bio/public/neurips-2023-competition/sc_counts.h5ad --no-sign-request resources/neurips-2023-raw/sc_counts.h5ad
-aws s3 cp s3://saturn-kaggle-datasets/open-problems-single-cell-perturbations-optional/lincs_id_compound_mapping.parquet --no-sign-request resources/neurips-2023-raw/lincs_id_compound_mapping.parquet
\ No newline at end of file
diff --git a/scripts/2_render_readme.sh b/scripts/2_render_readme.sh
deleted file mode 100755
index 29265cff..00000000
--- a/scripts/2_render_readme.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-set -e
-
-# create temp file and cleanup on exit
-TMP_CONFIG=$(mktemp /tmp/nextflow.XXXXXX.config)
-trap 'rm -f $TMP_CONFIG' EXIT
-
-# create temporary nextflow config file
-cat > $TMP_CONFIG <<EOF
-process {
-  errorStrategy = 'terminate'
-}
-EOF
-
-# run nextflow to create the README.md file
-nextflow run openproblems-bio/openproblems-v2 \
-  -r main_build \
-  -main-script target/nextflow/common/create_task_readme/main.nf \
-  -profile docker \
-  -latest \
-  -c $TMP_CONFIG \
-  --task "dge_perturbation_prediction" \
-  --task_dir "src/dge_perturbation_prediction" \
-  --github_url "https://github.com/openproblems-bio/task-dge-perturbation-prediction/tree/main/" \
-  --output "README.md" \
-  --viash_yaml "_viash.yaml" \
-  --publish_dir src/dge_perturbation_prediction
-
-# remove the unused state file
-rm src/dge_perturbation_prediction/run.create_task_readme.state.yaml
diff --git a/scripts/4_process_dataset.sh b/scripts/4_process_dataset.sh
deleted file mode 100755
index 61cb65e7..00000000
--- a/scripts/4_process_dataset.sh
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/bin/bash
-
-set -e
-
-IN=resources/neurips-2023-raw
-OUT=resources/neurips-2023-data
-
-# create directory if it doesn't exist
-[[ -d "$OUT" ]] || mkdir -p "$OUT"
-
-echo "Clean single-cell counts"
-viash run src/dge_perturbation_prediction/process_dataset/clean_sc_counts/config.vsh.yaml -- \
-  --input "$IN/sc_counts.h5ad" \
-  --lincs_id_compound_mapping "$IN/lincs_id_compound_mapping.parquet" \
-  --output "$OUT/sc_counts_cleaned.h5ad"
-
-echo "Compute pseudobulk"
-viash run src/dge_perturbation_prediction/process_dataset/compute_pseudobulk/config.vsh.yaml -- \
-  --input "$OUT/sc_counts_cleaned.h5ad" \
-  --output "$OUT/pseudobulk.h5ad"
-
-echo "Run limma on training set"
-viash run src/dge_perturbation_prediction/process_dataset/run_limma/config.vsh.yaml -- \
-  --input "$OUT/pseudobulk.h5ad" \
-  --output "$OUT/de_train.h5ad" \
-  --input_splits "train;control;public_test" \
-  --output_splits "train;control;public_test"
-  
-echo "Run limma on test set"
-viash run src/dge_perturbation_prediction/process_dataset/run_limma/config.vsh.yaml -- \
-  --input "$OUT/pseudobulk.h5ad" \
-  --output "$OUT/de_test.h5ad" \
-  --input_splits "train;control;public_test;private_test" \
-  --output_splits "private_test"
-
-echo "Convert h5ad to parquet"
-viash run src/dge_perturbation_prediction/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml -- \
-  --input_train "$OUT/de_train.h5ad" \
-  --input_test "$OUT/de_test.h5ad" \
-  --output_train "$OUT/de_train.parquet" \
-  --output_test "$OUT/de_test.parquet" \
-  --output_id_map "$OUT/id_map.csv"
-
-# # Alternatively:
-# nextflow run \
-#   target/nextflow/dge_perturbation_prediction/process_dataset/workflow/main.nf \
-#   -profile docker \
-#   --sc_counts "$IN/sc_counts.h5ad" \
-#   --lincs_id_compound_mapping "$IN/lincs_id_compound_mapping.parquet" \
-#   --pseudobulk "pseudo_bulk.h5ad" \
-#   --de_train_h5ad "de_train.h5ad" \
-#   --de_train_parquet "de_train.parquet" \
-#   --de_test_h5ad "de_test.h5ad" \
-#   --de_test_parquet "de_test.parquet" \
-#   --id_map "id_map.csv" \
-#   --publish_dir "$OUT"
\ No newline at end of file
diff --git a/scripts/5_run_rf_method.sh b/scripts/5_run_rf_method.sh
deleted file mode 100755
index c2594dfa..00000000
--- a/scripts/5_run_rf_method.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-set -e
-
-viash run src/dge_perturbation_prediction/methods/random_forest/config.vsh.yaml -- \
-  --de_train resources/neurips-2023-data/de_train.parquet \
-  --id_map resources/neurips-2023-data/id_map.csv \
-  --output resources/neurips-2023-data/output_rf.parquet
diff --git a/scripts/6_run_rf_metric.sh b/scripts/6_run_rf_metric.sh
deleted file mode 100755
index b14ddea9..00000000
--- a/scripts/6_run_rf_metric.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-set -e
-
-# run metric
-viash run src/dge_perturbation_prediction/metrics/mean_rowwise_rmse/config.vsh.yaml -- \
-  --prediction resources/neurips-2023-data/output_rf.parquet \
-  --de_test resources/neurips-2023-data/de_test.parquet \
-  --output resources/neurips-2023-data/score_rf.json
diff --git a/scripts/7_run_baseline_methods.sh b/scripts/7_run_baseline_methods.sh
deleted file mode 100755
index 94b246c1..00000000
--- a/scripts/7_run_baseline_methods.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-set -e
-
-viash run src/dge_perturbation_prediction/control_methods/baseline_zero/config.vsh.yaml -- \
-  --de_train resources/neurips-2023-data/de_train.parquet \
-  --de_test resources/neurips-2023-data/de_test.parquet \
-  --id_map resources/neurips-2023-data/id_map.csv \
-  --output resources/neurips-2023-data/output_baseline_zero.parquet
-
-viash run src/dge_perturbation_prediction/control_methods/ground_truth/config.vsh.yaml -- \
-  --de_train resources/neurips-2023-data/de_train.parquet \
-  --de_test resources/neurips-2023-data/de_test.parquet \
-  --id_map resources/neurips-2023-data/id_map.csv \
-  --output resources/neurips-2023-data/output_ground_truth.parquet
-
-viash run src/dge_perturbation_prediction/control_methods/sample/config.vsh.yaml -- \
-  --de_train resources/neurips-2023-data/de_train.parquet \
-  --de_test resources/neurips-2023-data/de_test.parquet \
-  --id_map resources/neurips-2023-data/id_map.csv \
-  --output resources/neurips-2023-data/output_sample.parquet
diff --git a/scripts/3_build_components.sh b/scripts/build_components.sh
similarity index 100%
rename from scripts/3_build_components.sh
rename to scripts/build_components.sh
diff --git a/scripts/download_resources.sh b/scripts/download_resources.sh
new file mode 100755
index 00000000..8c213362
--- /dev/null
+++ b/scripts/download_resources.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -e
+
+echo ">> Downloading resources"
+aws s3 sync --no-sign-request \
+  "s3://openproblems-bio/public/neurips-2023-competition/workflow-resources/" \
+  "resources" \
+  --delete
diff --git a/scripts/generate_resources.sh b/scripts/generate_resources.sh
new file mode 100755
index 00000000..246f33ea
--- /dev/null
+++ b/scripts/generate_resources.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+set -e
+
+IN=resources/neurips-2023-raw
+OUT=resources/neurips-2023-data
+
+[[ ! -d $IN ]] && mkdir -p $IN
+
+if [[ ! -f "$IN/sc_counts.h5ad" ]]; then
+  echo ">> Downloading 'sc_counts.h5ad'"
+  aws s3 cp --no-sign-request \
+    s3://openproblems-bio/public/neurips-2023-competition/sc_counts.h5ad \
+    "$IN/sc_counts.h5ad"
+fi
+if [[ ! -f "$IN/lincs_id_compound_mapping.parquet" ]]; then
+  echo ">> Downloading 'lincs_id_compound_mapping.parquet'"
+  aws s3 cp --no-sign-request \
+    s3://saturn-kaggle-datasets/open-problems-single-cell-perturbations-optional/lincs_id_compound_mapping.parquet \
+    "$IN/lincs_id_compound_mapping.parquet"
+fi
+
+echo ">> Running 'process_dataset' workflow"
+nextflow run \
+  target/nextflow/process_dataset/workflow/main.nf \
+  -profile docker \
+  -resume \
+  --id neurips-2023-data \
+  --sc_counts "$IN/sc_counts.h5ad" \
+  --lincs_id_compound_mapping "$IN/lincs_id_compound_mapping.parquet" \
+  --dataset_id "neurips-2023-data" \
+  --dataset_name "NeurIPS2023 scPerturb DGE" \
+  --dataset_url "TBD" \
+  --dataset_reference "TBD" \
+  --dataset_summary "Differential gene expression sign(logFC) * -log10(p-value) values after 24 hours of treatment with 144 compounds in human PBMCs" \
+  --dataset_description "For this competition, we designed and generated a novel single-cell perturbational dataset in human peripheral blood mononuclear cells (PBMCs). We selected 144 compounds from the Library of Integrated Network-Based Cellular Signatures (LINCS) Connectivity Map dataset (PMID: 29195078) and measured single-cell gene expression profiles after 24 hours of treatment. The experiment was repeated in three healthy human donors, and the compounds were selected based on diverse transcriptional signatures observed in CD34+ hematopoietic stem cells (data not released). We performed this experiment in human PBMCs because the cells are commercially available with pre-obtained consent for public release and PBMCs are a primary, disease-relevant tissue that contains multiple mature cell types (including T-cells, B-cells, myeloid cells, and NK cells) with established markers for annotation of cell types. To supplement this dataset, we also measured cells from each donor at baseline with joint scRNA and single-cell chromatin accessibility measurements using the 10x Multiome assay. We hope that the addition of rich multi-omic data for each donor and cell type at baseline will help establish biological priors that explain the susceptibility of particular genes to exhibit perturbation responses in difference biological contexts." \
+  --dataset_organism "homo_sapiens" \
+  --output_state "state.yaml" \
+  --publish_dir "$OUT"
+
+echo ">> Run method"
+viash run src/task/control_methods/sample/config.vsh.yaml -- \
+  --de_train "$OUT/de_train.parquet" \
+  --de_test "$OUT/de_test.parquet" \
+  --id_map "$OUT/id_map.csv" \
+  --output "$OUT/prediction.parquet"
+
+echo ">> Run metric"
+viash run src/task/metrics/mean_rowwise_rmse/config.vsh.yaml -- \
+  --prediction "$OUT/prediction.parquet" \
+  --de_test "$OUT/de_test.parquet" \
+  --output "$OUT/score.h5ad"
+
+echo ">> Uploading results to S3"
+aws s3 sync --profile op2 \
+  --include "*" \
+  --exclude "neurips-2023-raw/*" \
+  --exclude "neurips-2023-public/*" \
+  "resources" \
+  "s3://openproblems-bio/public/neurips-2023-competition/workflow-resources/" \
+  --delete --dryrun
diff --git a/scripts/render_readme.sh b/scripts/render_readme.sh
new file mode 100755
index 00000000..b805cbea
--- /dev/null
+++ b/scripts/render_readme.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -e
+
+[[ ! -d ../openproblems-v2 ]] && echo "You need to clone the openproblems-v2 repository next to this repository" && exit 1
+
+../openproblems-v2/target/docker/common/create_task_readme/create_task_readme \
+  --task "dge_perturbation_prediction" \
+  --task_dir "src/task" \
+  --github_url "https://github.com/openproblems-bio/task-dge-perturbation-prediction/tree/main/" \
+  --output "README.md"
diff --git a/scripts/run_benchmark.sh b/scripts/run_benchmark.sh
new file mode 100755
index 00000000..0379f44d
--- /dev/null
+++ b/scripts/run_benchmark.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+set -e
+
+IN="resources"
+OUT="output"
+
+[[ ! -d "$OUT" ]] && mkdir -p "$OUT"
+
+# run benchmark
+export NXF_VER=23.04.2
+
+nextflow run . \
+  -main-script target/nextflow/workflows/run_benchmark/main.nf \
+  -profile docker \
+  -resume \
+  --publish_dir "$OUT" \
+  --output_state "state.yaml" \
+  --id neurips-2023-data \
+  --dataset_info "$IN/neurips-2023-data/dataset_info.yaml" \
+  --de_train "$IN/neurips-2023-data/de_train.parquet" \
+  --de_test "$IN/neurips-2023-data/de_test.parquet" \
+  --id_map "$IN/neurips-2023-data/id_map.csv"
+
+  # Alternatively: could also replace everything starting from '--id' with:
+  # -entry auto \
+  # --input_states "$IN/**/state.yaml"
\ No newline at end of file
diff --git a/scripts/test_components.sh b/scripts/test_components.sh
new file mode 100755
index 00000000..563e2c5e
--- /dev/null
+++ b/scripts/test_components.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+viash ns test --parallel
\ No newline at end of file
diff --git a/src/common/component_tests/run_and_check_output.py b/src/common/component_tests/run_and_check_output.py
new file mode 100644
index 00000000..3cd6eb2b
--- /dev/null
+++ b/src/common/component_tests/run_and_check_output.py
@@ -0,0 +1,143 @@
+import anndata as ad
+import pandas as pd
+import subprocess
+from os import path
+import yaml
+import re
+
+## VIASH START
+meta = {
+    "executable": "target/docker/denoising/methods/dca/dca",
+    "config": "target/docker/denoising/methods/dca/.config.vsh.yaml",
+    "resources_dir": "resources_test/denoising"
+}
+## VIASH END
+
+# helper functions
+def check_h5ad_slots(adata, arg):
+    """Check whether an AnnData file contains all for the required
+    slots in the corresponding .info.slots field.
+    """
+    for struc_name, items in arg["info"].get("slots", {}).items():
+        struc_x = getattr(adata, struc_name)
+        
+        if struc_name == "X":
+            if items.get("required", True):
+                assert struc_x is not None,\
+                    f"File '{arg['value']}' is missing slot .{struc_name}"
+        
+        else:
+            for item in items:
+                if item.get("required", True):
+                    assert item["name"] in struc_x,\
+                        f"File '{arg['value']}' is missing slot .{struc_name}['{item['name']}']"
+
+def check_df_columns(df, arg):
+    """Check whether a DataFrame contains all for the required
+    columns in the corresponding .info.columns field.
+    """
+    for item in arg["info"].get("columns", []):
+        if item.get("required", True):
+            assert item['name'] in df.columns,\
+                f"File '{arg['value']}' is missing column '{item['name']}'"
+
+def run_and_check_outputs(arguments, cmd):
+    print(">> Checking whether input files exist", flush=True)
+    for arg in arguments:
+        if arg["type"] == "file" and arg["direction"] == "input":
+            assert path.exists(arg["value"]), f"Input file '{arg['value']}' does not exist"
+
+    print(f">> Running script as test", flush=True)
+    out = subprocess.run(cmd, stderr=subprocess.STDOUT)
+
+    if out.stdout:
+        print(out.stdout)
+
+    if out.returncode:
+        print(f"script: \'{' '.join(cmd)}\' exited with an error.")
+        exit(out.returncode)
+
+    print(">> Checking whether output file exists", flush=True)
+    for arg in arguments:
+        if arg["type"] == "file" and arg["direction"] == "output":
+            assert path.exists(arg["value"]), f"Output file '{arg['value']}' does not exist"
+
+    print(">> Reading h5ad files and checking formats", flush=True)
+    for arg in arguments:
+        file_type = arg.get("info", {}).get("file_type", "h5ad")
+        if arg["type"] == "file":
+            if file_type == "h5ad" and "slots" in arg["info"]:
+                print(f"Reading and checking {arg['clean_name']}", flush=True)
+
+                # try to read as an anndata, else as a parquet file
+                adata = ad.read_h5ad(arg["value"])
+
+                print(f"  {adata}")
+
+                check_h5ad_slots(adata, arg)
+            elif file_type in ["parquet", "csv"] and "columns" in arg["info"]:
+                print(f"Reading and checking {arg['clean_name']}", flush=True)
+
+                if file_type == "csv":
+                    df = pd.read_csv(arg["value"])
+                else:
+                    df = pd.read_parquet(arg["value"])
+                print(f"  {df}")
+                
+                check_df_columns(df, arg)
+
+
+    print("All checks succeeded!", flush=True)
+
+
+# read viash config
+with open(meta["config"], "r") as file:
+    config = yaml.safe_load(file)
+
+# get resources
+arguments = []
+
+for arg in config["functionality"]["arguments"]:
+    new_arg = arg.copy()
+
+    # set clean name
+    clean_name = re.sub("^--", "", arg["name"])
+    new_arg["clean_name"] = clean_name
+
+    # use example to find test resource file
+    if arg["type"] == "file":
+      if arg["direction"] == "input":
+          value = f"{meta['resources_dir']}/{arg['example'][0]}"
+      else:
+          value = f"{clean_name}.h5ad"
+      new_arg["value"] = value
+    
+    arguments.append(new_arg)
+
+
+if "test_setup" not in config["functionality"]["info"]:
+    argument_sets = {"run": arguments}
+else:
+    test_setup = config["functionality"]["info"]["test_setup"]
+    argument_sets = {}
+    for name, test_instance in test_setup.items():
+        new_arguments = []
+        for arg in arguments:
+            new_arg = arg.copy()
+            if arg["clean_name"] in test_instance:
+                val = test_instance[arg["clean_name"]]
+                if new_arg["type"] == "file" and new_arg["direction"] == "input":
+                    val = f"{meta['resources_dir']}/{val}"
+                new_arg["value"] = val
+            new_arguments.append(new_arg)
+        argument_sets[name] = new_arguments
+
+for argset_name, argset_args in argument_sets.items():
+    print(f">> Running test '{argset_name}'", flush=True)
+    # construct command
+    cmd = [ meta["executable"] ]
+    for arg in argset_args:
+        if arg["type"] == "file":
+            cmd.extend([arg["name"], arg["value"]])
+
+    run_and_check_outputs(argset_args, cmd)
\ No newline at end of file
diff --git a/src/dge_perturbation_prediction/README.md b/src/dge_perturbation_prediction/README.md
deleted file mode 100644
index d0f23255..00000000
--- a/src/dge_perturbation_prediction/README.md
+++ /dev/null
@@ -1,366 +0,0 @@
-# DGE Perturbation Prediction
-
-
-DGE Perturbation Prediction
-
-Path:
-[`src/dge_perturbation_prediction`](https://github.com/openproblems-bio/task-dge-perturbation-prediction/tree/main/src/dge_perturbation_prediction)
-
-## Motivation
-
-TODO: fill in
-
-## Description
-
-TODO: fill in
-
-## Authors & contributors
-
-| name              | roles  |
-|:------------------|:-------|
-| Artur Szałata     | author |
-| Robrecht Cannoodt | author |
-
-## API
-
-``` mermaid
-flowchart LR
-  file_sc_counts("Single Cell Counts")
-  comp_process_dataset[/"Data processor"/]
-  file_de_train("DE train")
-  file_de_test("DE test")
-  file_id_map("ID Map")
-  comp_control_method[/"Control Method"/]
-  comp_method[/"Method"/]
-  comp_metric[/"Metric"/]
-  file_prediction("Prediction")
-  file_score("Score")
-  file_lincs_id_compound_mapping("Mapping compound names to lincs ids and smiles")
-  file_sc_counts---comp_process_dataset
-  comp_process_dataset-->file_de_train
-  comp_process_dataset-->file_de_test
-  comp_process_dataset-->file_id_map
-  file_de_train---comp_control_method
-  file_de_train---comp_method
-  file_de_test---comp_control_method
-  file_de_test---comp_metric
-  file_id_map---comp_control_method
-  file_id_map---comp_method
-  comp_control_method-->file_prediction
-  comp_method-->file_prediction
-  comp_metric-->file_score
-  file_prediction---comp_metric
-  file_lincs_id_compound_mapping---comp_process_dataset
-```
-
-## File format: Single Cell Counts
-
-Anndata with the counts of the whole dataset.
-
-Example file: `resources/neurips-2023-raw/sc_counts.h5ad`
-
-Format:
-
-<div class="small">
-
-    AnnData object
-     obs: 'dose_uM', 'timepoint_hr', 'raw_cell_id', 'hashtag_id', 'well', 'container_format', 'row', 'col', 'plate_name', 'cell_id', 'cell_type', 'split', 'donor_id', 'sm_name'
-     obsm: 'HTO_clr', 'X_pca', 'X_umap', 'protein_counts'
-     layers: 'counts'
-
-</div>
-
-Slot description:
-
-<div class="small">
-
-| Slot                      | Type      | Description                                    |
-|:--------------------------|:----------|:-----------------------------------------------|
-| `obs["dose_uM"]`          | `integer` | Dose in micromolar.                            |
-| `obs["timepoint_hr"]`     | `float`   | Time point measured in hours.                  |
-| `obs["raw_cell_id"]`      | `string`  | Original cell identifier.                      |
-| `obs["hashtag_id"]`       | `string`  | Identifier for hashtag oligo.                  |
-| `obs["well"]`             | `string`  | Well location in the plate.                    |
-| `obs["container_format"]` | `string`  | Format of the container (e.g., 96-well plate). |
-| `obs["row"]`              | `string`  | Row in the plate.                              |
-| `obs["col"]`              | `integer` | Column in the plate.                           |
-| `obs["plate_name"]`       | `string`  | Name of the plate.                             |
-| `obs["cell_id"]`          | `string`  | Unique cell identifier.                        |
-| `obs["cell_type"]`        | `string`  | Type of cell (e.g., B cells, T cells CD4+).    |
-| `obs["split"]`            | `string`  | Dataset split type (e.g., control, treated).   |
-| `obs["donor_id"]`         | `string`  | Identifier for the donor.                      |
-| `obs["sm_name"]`          | `string`  | Name of the small molecule used for treatment. |
-| `obsm["HTO_clr"]`         | `matrix`  | Corrected counts for hashing tags.             |
-| `obsm["X_pca"]`           | `matrix`  | Principal component analysis results.          |
-| `obsm["X_umap"]`          | `matrix`  | UMAP dimensionality reduction results.         |
-| `obsm["protein_counts"]`  | `matrix`  | Count data for proteins.                       |
-| `layers["counts"]`        | `matrix`  | Raw count data for each gene across cells.     |
-
-</div>
-
-## Component type: Data processor
-
-Path:
-[`src/dge_perturbation_prediction`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/dge_perturbation_prediction)
-
-A DGE regression dataset processor
-
-Arguments:
-
-<div class="small">
-
-| Name                          | Type   | Description                                                                                  |
-|:------------------------------|:-------|:---------------------------------------------------------------------------------------------|
-| `--sc_counts`                 | `file` | Anndata with the counts of the whole dataset.                                                |
-| `--lincs_id_compound_mapping` | `file` | Parquet file mapping compound names to lincs ids and smiles.                                 |
-| `--de_train`                  | `file` | (*Output*) Differential expression results for training.                                     |
-| `--de_test`                   | `file` | (*Output*) Differential expression results for testing.                                      |
-| `--id_map`                    | `file` | (*Output*) File indicates the order of de_test, the cell types and the small molecule names. |
-
-</div>
-
-## File format: DE train
-
-Differential expression results for training.
-
-Example file: `resources/neurips-2023-data/de_train.h5ad`
-
-Format:
-
-<div class="small">
-
-    AnnData object
-     obs: 'cell_type', 'sm_name', 'sm_lincs_id', 'SMILES', 'split', 'control'
-     layers: 'P.Value', 'adj.P.Value', 'is_de', 'is_de_adj', 'logFC', 'sign_log10_pval'
-
-</div>
-
-Slot description:
-
-<div class="small">
-
-| Slot                        | Type      | Description                                                                                                                                                                                                                                                                                                      |
-|:----------------------------|:----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `obs["cell_type"]`          | `string`  | The annotated cell type of each cell based on RNA expression.                                                                                                                                                                                                                                                    |
-| `obs["sm_name"]`            | `string`  | The primary name for the (parent) compound (in a standardized representation) as chosen by LINCS. This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                        |
-| `obs["sm_lincs_id"]`        | `string`  | The global LINCS ID (parent) compound (in a standardized representation). This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                                                |
-| `obs["SMILES"]`             | `string`  | Simplified molecular-input line-entry system (SMILES) representations of the compounds used in the experiment. This is a 1D representation of molecular structure. These SMILES are provided by Cellarity based on the specific compounds ordered for this experiment.                                           |
-| `obs["split"]`              | `string`  | Split. Must be one of ‘control’, ‘train’, ‘public_test’, or ‘private_test’.                                                                                                                                                                                                                                      |
-| `obs["control"]`            | `boolean` | Boolean indicating whether this instance was used as a control.                                                                                                                                                                                                                                                  |
-| `layers["P.Value"]`         | `double`  | P-value of the differential expression test.                                                                                                                                                                                                                                                                     |
-| `layers["adj.P.Value"]`     | `double`  | Adjusted P-value of the differential expression test.                                                                                                                                                                                                                                                            |
-| `layers["is_de"]`           | `boolean` | Whether the gene is differentially expressed.                                                                                                                                                                                                                                                                    |
-| `layers["is_de_adj"]`       | `boolean` | Whether the gene is differentially expressed after adjustment.                                                                                                                                                                                                                                                   |
-| `layers["logFC"]`           | `double`  | Log fold change of the differential expression test.                                                                                                                                                                                                                                                             |
-| `layers["sign_log10_pval"]` | `double`  | Differential expression value (-log10(p-value) \* sign(LFC)) for each gene. Here, LFC is the estimated log-fold change in expression between the treatment and control condition after shrinkage as calculated by Limma. Positive LFC means the gene goes up in the treatment condition relative to the control. |
-
-</div>
-
-## File format: DE test
-
-Differential expression results for testing.
-
-Example file: `resources/neurips-2023-data/de_test.h5ad`
-
-Format:
-
-<div class="small">
-
-    AnnData object
-     obs: 'id', 'cell_type', 'sm_name', 'sm_lincs_id', 'SMILES', 'split', 'control'
-     layers: 'P.Value', 'adj.P.Value', 'is_de', 'is_de_adj', 'logFC', 'sign_log10_pval'
-
-</div>
-
-Slot description:
-
-<div class="small">
-
-| Slot                        | Type      | Description                                                                                                                                                                                                                                                                                                      |
-|:----------------------------|:----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `obs["id"]`                 | `integer` | Index of the test observation.                                                                                                                                                                                                                                                                                   |
-| `obs["cell_type"]`          | `string`  | The annotated cell type of each cell based on RNA expression.                                                                                                                                                                                                                                                    |
-| `obs["sm_name"]`            | `string`  | The primary name for the (parent) compound (in a standardized representation) as chosen by LINCS. This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                        |
-| `obs["sm_lincs_id"]`        | `string`  | The global LINCS ID (parent) compound (in a standardized representation). This is provided to map the data in this experiment to the LINCS Connectivity Map data.                                                                                                                                                |
-| `obs["SMILES"]`             | `string`  | Simplified molecular-input line-entry system (SMILES) representations of the compounds used in the experiment. This is a 1D representation of molecular structure. These SMILES are provided by Cellarity based on the specific compounds ordered for this experiment.                                           |
-| `obs["split"]`              | `string`  | Split. Must be one of ‘control’, ‘train’, ‘public_test’, or ‘private_test’.                                                                                                                                                                                                                                      |
-| `obs["control"]`            | `boolean` | Boolean indicating whether this instance was used as a control.                                                                                                                                                                                                                                                  |
-| `layers["P.Value"]`         | `double`  | P-value of the differential expression test.                                                                                                                                                                                                                                                                     |
-| `layers["adj.P.Value"]`     | `double`  | Adjusted P-value of the differential expression test.                                                                                                                                                                                                                                                            |
-| `layers["is_de"]`           | `boolean` | Whether the gene is differentially expressed.                                                                                                                                                                                                                                                                    |
-| `layers["is_de_adj"]`       | `boolean` | Whether the gene is differentially expressed after adjustment.                                                                                                                                                                                                                                                   |
-| `layers["logFC"]`           | `double`  | Log fold change of the differential expression test.                                                                                                                                                                                                                                                             |
-| `layers["sign_log10_pval"]` | `double`  | Differential expression value (-log10(p-value) \* sign(LFC)) for each gene. Here, LFC is the estimated log-fold change in expression between the treatment and control condition after shrinkage as calculated by Limma. Positive LFC means the gene goes up in the treatment condition relative to the control. |
-
-</div>
-
-## File format: ID Map
-
-File indicates the order of de_test, the cell types and the small
-molecule names.
-
-Example file: `resources/neurips-2023-data/id_map.csv`
-
-Format:
-
-<div class="small">
-
-    AnnData object
-     obs: 'id', 'cell_type', 'sm_name'
-
-</div>
-
-Slot description:
-
-<div class="small">
-
-| Slot               | Type      | Description                    |
-|:-------------------|:----------|:-------------------------------|
-| `obs["id"]`        | `integer` | Index of the test observation. |
-| `obs["cell_type"]` | `string`  | Cell type name.                |
-| `obs["sm_name"]`   | `string`  | Small molecule name.           |
-
-</div>
-
-## Component type: Control Method
-
-Path:
-[`src/dge_perturbation_prediction/control_methods`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/dge_perturbation_prediction/control_methods)
-
-A control method.
-
-Arguments:
-
-<div class="small">
-
-| Name         | Type   | Description                                                                       |
-|:-------------|:-------|:----------------------------------------------------------------------------------|
-| `--de_train` | `file` | Differential expression results for training.                                     |
-| `--de_test`  | `file` | Differential expression results for testing.                                      |
-| `--id_map`   | `file` | File indicates the order of de_test, the cell types and the small molecule names. |
-| `--output`   | `file` | (*Output*) Differential Gene Expression prediction.                               |
-
-</div>
-
-## Component type: Method
-
-Path:
-[`src/dge_perturbation_prediction/methods`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/dge_perturbation_prediction/methods)
-
-A regression method.
-
-Arguments:
-
-<div class="small">
-
-| Name         | Type   | Description                                                                       |
-|:-------------|:-------|:----------------------------------------------------------------------------------|
-| `--de_train` | `file` | Differential expression results for training.                                     |
-| `--id_map`   | `file` | File indicates the order of de_test, the cell types and the small molecule names. |
-| `--output`   | `file` | (*Output*) Differential Gene Expression prediction.                               |
-
-</div>
-
-## Component type: Metric
-
-Path:
-[`src/dge_perturbation_prediction/metrics`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/dge_perturbation_prediction/metrics)
-
-A metric to compare two predictions.
-
-Arguments:
-
-<div class="small">
-
-| Name           | Type   | Description                                       |
-|:---------------|:-------|:--------------------------------------------------|
-| `--de_test`    | `file` | Differential expression results for testing.      |
-| `--prediction` | `file` | Differential Gene Expression prediction.          |
-| `--output`     | `file` | (*Output*) File indicating the score of a metric. |
-
-</div>
-
-## File format: Prediction
-
-Differential Gene Expression prediction
-
-Example file: `resources/neurips-2023-data/output_rf.parquet`
-
-Format:
-
-<div class="small">
-
-    AnnData object
-     obs: 'id'
-     layers: 'sign_log10_pval'
-
-</div>
-
-Slot description:
-
-<div class="small">
-
-| Slot                        | Type      | Description                                                          |
-|:----------------------------|:----------|:---------------------------------------------------------------------|
-| `obs["id"]`                 | `integer` | Index of the test observation.                                       |
-| `layers["sign_log10_pval"]` | `double`  | Predicted sign of the logFC times the log10 of the adjusted p-value. |
-
-</div>
-
-## File format: Score
-
-File indicating the score of a metric.
-
-Example file: `resources/neurips-2023-data/score_rf.json`
-
-Format:
-
-<div class="small">
-
-    AnnData object
-     uns: 'dataset_id', 'method_id', 'metric_ids', 'metric_values'
-
-</div>
-
-Slot description:
-
-<div class="small">
-
-| Slot                   | Type     | Description                                                                                  |
-|:-----------------------|:---------|:---------------------------------------------------------------------------------------------|
-| `uns["dataset_id"]`    | `string` | A unique identifier for the dataset.                                                         |
-| `uns["method_id"]`     | `string` | A unique identifier for the method.                                                          |
-| `uns["metric_ids"]`    | `string` | One or more unique metric identifiers.                                                       |
-| `uns["metric_values"]` | `double` | The metric values obtained for the given prediction. Must be of same length as ‘metric_ids’. |
-
-</div>
-
-## File format: Mapping compound names to lincs ids and smiles
-
-Parquet file mapping compound names to lincs ids and smiles.
-
-Example file:
-`resources/neurips-2023-raw/lincs_id_compound_mapping.parquet`
-
-Format:
-
-<div class="small">
-
-    AnnData object
-     obs: 'compound_id', 'sm_lincs_id', 'sm_name', 'smiles'
-
-</div>
-
-Slot description:
-
-<div class="small">
-
-| Slot                 | Type     | Description                                           |
-|:---------------------|:---------|:------------------------------------------------------|
-| `obs["compound_id"]` | `string` | Unique identifier for the compound.                   |
-| `obs["sm_lincs_id"]` | `string` | LINCS identifier for the compound.                    |
-| `obs["sm_name"]`     | `string` | Name of the compound.                                 |
-| `obs["smiles"]`      | `string` | SMILES notation representing the molecular structure. |
-
-</div>
-
diff --git a/src/dge_perturbation_prediction/api/file_id_map.yaml b/src/dge_perturbation_prediction/api/file_id_map.yaml
deleted file mode 100644
index a4464e55..00000000
--- a/src/dge_perturbation_prediction/api/file_id_map.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-type: file
-example: resources/neurips-2023-data/id_map.csv
-info:
-  label: ID Map
-  summary: "File indicates the order of de_test, the cell types and the small molecule names."
-  slots:
-    obs:
-      - name: id
-        type: integer
-        description: Index of the test observation
-        required: true
-      - name: cell_type
-        type: string
-        description: "Cell type name"
-        required: true
-      - name: sm_name
-        type: string
-        description: "Small molecule name"
-        required: true
diff --git a/src/dge_perturbation_prediction/api/file_lincs_id_compound_mapping.yaml b/src/dge_perturbation_prediction/api/file_lincs_id_compound_mapping.yaml
deleted file mode 100644
index 166bfdb8..00000000
--- a/src/dge_perturbation_prediction/api/file_lincs_id_compound_mapping.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-type: file
-example: resources/neurips-2023-raw/lincs_id_compound_mapping.parquet
-info:
-  label: Mapping compound names to lincs ids and smiles
-  summary: "Parquet file mapping compound names to lincs ids and smiles."
-  slots:
-    obs:
-      - name: compound_id
-        description: "Unique identifier for the compound."
-        type: string
-        required: true
-      - name: sm_lincs_id
-        description: "LINCS identifier for the compound."
-        type: string
-        required: true
-      - name: sm_name
-        description: "Name of the compound."
-        type: string
-        required: true
-      - name: smiles
-        description: "SMILES notation representing the molecular structure."
-        type: string
-        required: true
-
diff --git a/src/dge_perturbation_prediction/api/file_prediction.yaml b/src/dge_perturbation_prediction/api/file_prediction.yaml
deleted file mode 100644
index 5a5b13bb..00000000
--- a/src/dge_perturbation_prediction/api/file_prediction.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-type: file
-example: resources/neurips-2023-data/output_rf.parquet
-info:
-  label: Prediction
-  summary: "Differential Gene Expression prediction"
-  slots:
-    obs:
-      - name: id
-        type: integer
-        description: Index of the test observation
-        required: true
-    layers:
-      - name: sign_log10_pval
-        type: double
-        description: "Predicted sign of the logFC times the log10 of the adjusted p-value"
-        required: true
diff --git a/src/dge_perturbation_prediction/api/task_info.yaml b/src/dge_perturbation_prediction/api/task_info.yaml
deleted file mode 100644
index db8aa451..00000000
--- a/src/dge_perturbation_prediction/api/task_info.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: dge_perturbation_prediction
-label: DGE Perturbation Prediction
-summary: "DGE Perturbation Prediction"
-motivation: |
-  TODO: fill in
-description: |
-  TODO: fill in
-authors:
-  - name: Artur Szałata
-    roles: [ author ]
-    info:
-      github: szalata
-      orcid: "000-0001-8413-234X"
-  - name: Robrecht Cannoodt
-    roles: [ author ]
-    info:
-      github: rcannood
-      orcid: "0000-0003-3641-729X"
diff --git a/src/dge_perturbation_prediction/metrics/mean_rowwise_rmse/script.py b/src/dge_perturbation_prediction/metrics/mean_rowwise_rmse/script.py
deleted file mode 100644
index 91509cf3..00000000
--- a/src/dge_perturbation_prediction/metrics/mean_rowwise_rmse/script.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import pandas as pd
-import json
-
-## VIASH START
-par = {
-  "de_test": "resources/neurips-2023-data/de_test.parquet",
-  "prediction": "resources/neurips-2023-data/output_rf.parquet",
-  "output": "resources/neurips-2023-data/score_rf.json",
-}
-## VIASH END
-
-de_test = pd.read_parquet(par["de_test"]).set_index('id')
-prediction = pd.read_parquet(par["prediction"]).set_index('id')
-
-# subset to the same columns
-genes = list(set(de_test.columns) - set(["cell_type", "sm_name", "sm_lincs_id", "SMILES", "split", "control"]))
-de_test = de_test.loc[:, genes]
-prediction = prediction[genes]
-
-# compute mean_rowwise_rmse
-mean_rowwise_rmse = 0
-for i in de_test.index:
-    mean_rowwise_rmse += ((de_test.iloc[i] - prediction.iloc[i])**2).mean()
-
-mean_rowwise_rmse /= de_test.shape[0]
-
-output = {
-    "mean_rowwise_rmse": mean_rowwise_rmse
-}
-
-# write to file
-with open(par["output"], 'w') as f:
-    json.dump(output, f)
diff --git a/src/dge_perturbation_prediction/process_dataset/workflow/config.vsh.yaml b/src/dge_perturbation_prediction/process_dataset/workflow/config.vsh.yaml
deleted file mode 100644
index ed8cd2f4..00000000
--- a/src/dge_perturbation_prediction/process_dataset/workflow/config.vsh.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-functionality:
-  name: workflow
-  namespace: dge_perturbation_prediction/process_dataset
-  info:
-    type: process_dataset
-    type_info:
-      label: Workflow
-      summary: Process the raw dataset
-      description: |
-        Process the raw dataset for the DGE regression task.
-  arguments:
-    - name: --sc_counts
-      type: file
-      required: true
-      direction: input
-      example: resources/neurips-2023-raw/sc_counts.h5ad
-    - name: --lincs_id_compound_mapping
-      type: file
-      required: true
-      direction: input
-      example: resources/neurips-2023-raw/lincs_id_compound_mapping.parquet
-    - name: --pseudobulk
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/pseudobulk.h5ad
-    - name: --de_train_h5ad
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/de_train.h5ad
-    - name: --de_test_h5ad
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/de_test.h5ad
-    - name: --de_train_parquet
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/de_train.parquet
-    - name: --de_test_parquet
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/de_test.parquet
-    - name: --id_map
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/id_map.csv
-  resources:
-    - type: nextflow_script
-      path: main.nf
-      entrypoint: run_wf
-  dependencies:
-    - name: dge_perturbation_prediction/process_dataset/clean_sc_counts
-    - name: dge_perturbation_prediction/process_dataset/compute_pseudobulk
-    - name: dge_perturbation_prediction/process_dataset/run_limma
-    - name: dge_perturbation_prediction/process_dataset/convert_h5ad_to_parquet
-platforms:
-  - type: nextflow
-    directives:
-      label: [ midtime, midmem, lowcpu ]
diff --git a/src/dge_perturbation_prediction/api/comp_control_method.yaml b/src/task/api/comp_control_method.yaml
similarity index 64%
rename from src/dge_perturbation_prediction/api/comp_control_method.yaml
rename to src/task/api/comp_control_method.yaml
index ab27090e..32df0cf2 100644
--- a/src/dge_perturbation_prediction/api/comp_control_method.yaml
+++ b/src/task/api/comp_control_method.yaml
@@ -1,5 +1,5 @@
 functionality:
-  namespace: "dge_perturbation_prediction/control_methods"
+  namespace: "control_methods"
   info:
     type: control_method
     type_info:
@@ -9,11 +9,11 @@ functionality:
         A control method to predict perturbation effects.
   arguments:
     - name: --de_train
-      __merge__: file_de_train.yaml
+      __merge__: file_de_train_parquet.yaml
       required: true
       direction: input
     - name: --de_test
-      __merge__: file_de_test.yaml
+      __merge__: file_de_test_parquet.yaml
       required: true
       direction: input
     - name: --id_map
@@ -24,4 +24,8 @@ functionality:
       __merge__: file_prediction.yaml
       required: true
       direction: output
-  test_resources: []
\ No newline at end of file
+  test_resources:
+    - type: python_script
+      path: /src/common/component_tests/run_and_check_output.py
+    - path: /resources/neurips-2023-data
+      dest: resources/neurips-2023-data
\ No newline at end of file
diff --git a/src/dge_perturbation_prediction/api/comp_method.yaml b/src/task/api/comp_method.yaml
similarity index 66%
rename from src/dge_perturbation_prediction/api/comp_method.yaml
rename to src/task/api/comp_method.yaml
index 04da4560..75028247 100644
--- a/src/dge_perturbation_prediction/api/comp_method.yaml
+++ b/src/task/api/comp_method.yaml
@@ -1,5 +1,5 @@
 functionality:
-  namespace: "dge_perturbation_prediction/methods"
+  namespace: "methods"
   info:
     type: method
     type_info:
@@ -9,7 +9,7 @@ functionality:
         A regression method to predict the expression of one modality from another.
   arguments:
     - name: --de_train
-      __merge__: file_de_train.yaml
+      __merge__: file_de_train_parquet.yaml
       required: true
       direction: input
     - name: --id_map
@@ -20,4 +20,8 @@ functionality:
       __merge__: file_prediction.yaml
       required: true
       direction: output
-  test_resources: []
\ No newline at end of file
+  test_resources:
+    - type: python_script
+      path: /src/common/component_tests/run_and_check_output.py
+    - path: /resources/neurips-2023-data
+      dest: resources/neurips-2023-data
\ No newline at end of file
diff --git a/src/dge_perturbation_prediction/api/comp_metric.yaml b/src/task/api/comp_metric.yaml
similarity index 65%
rename from src/dge_perturbation_prediction/api/comp_metric.yaml
rename to src/task/api/comp_metric.yaml
index d2eda929..c9322253 100644
--- a/src/dge_perturbation_prediction/api/comp_metric.yaml
+++ b/src/task/api/comp_metric.yaml
@@ -1,5 +1,5 @@
 functionality:
-  namespace: "dge_perturbation_prediction/metrics"
+  namespace: "metrics"
   info:
     type: metric
     type_info:
@@ -9,7 +9,7 @@ functionality:
         A metric to compare two predictions.
   arguments:
     - name: --de_test
-      __merge__: file_de_test.yaml
+      __merge__: file_de_test_parquet.yaml
       required: true
       direction: input
     - name: --prediction
@@ -20,4 +20,8 @@ functionality:
       __merge__: file_score.yaml
       direction: output
       required: true
-  test_resources: []
\ No newline at end of file
+  test_resources:
+    - type: python_script
+      path: /src/common/component_tests/run_and_check_output.py
+    - path: /resources/neurips-2023-data
+      dest: resources/neurips-2023-data
\ No newline at end of file
diff --git a/src/dge_perturbation_prediction/api/comp_process_dataset.yaml b/src/task/api/comp_process_dataset.yaml
similarity index 63%
rename from src/dge_perturbation_prediction/api/comp_process_dataset.yaml
rename to src/task/api/comp_process_dataset.yaml
index c47a0a9a..991dfc29 100644
--- a/src/dge_perturbation_prediction/api/comp_process_dataset.yaml
+++ b/src/task/api/comp_process_dataset.yaml
@@ -1,12 +1,12 @@
 functionality:
-  namespace: "dge_perturbation_prediction"
+  namespace: "process_dataset"
   info:
     type: process_dataset
     type_info:
-      label: Data processor
-      summary: A DGE regression dataset processor
+      label: Process dataset
+      summary: Process the raw dataset
       description: |
-        A DGE regression dataset processor to process the dataset for the DGE regression task.
+        Process the raw dataset for the DGE regression task.
   arguments:
     - name: --sc_counts
       __merge__: file_sc_counts.yaml
@@ -17,15 +17,18 @@ functionality:
       required: true
       direction: input
     - name: --de_train
-      __merge__: file_de_train.yaml
+      __merge__: file_de_train_parquet.yaml
       required: true
       direction: output
+      default: de_train.parquet
     - name: --de_test
-      __merge__: file_de_test.yaml
+      __merge__: file_de_test_parquet.yaml
       required: true
       direction: output
+      default: de_test.parquet
     - name: --id_map
       __merge__: file_id_map.yaml
       required: true
       direction: output
+      default: id_map.csv
   test_resources: []
\ No newline at end of file
diff --git a/src/dge_perturbation_prediction/api/file_de_test.yaml b/src/task/api/file_de_test_h5ad.yaml
similarity index 99%
rename from src/dge_perturbation_prediction/api/file_de_test.yaml
rename to src/task/api/file_de_test_h5ad.yaml
index 5ec8ec75..5ca5f264 100644
--- a/src/dge_perturbation_prediction/api/file_de_test.yaml
+++ b/src/task/api/file_de_test_h5ad.yaml
@@ -3,6 +3,7 @@ example: resources/neurips-2023-data/de_test.h5ad
 info:
   label: DE test
   summary: "Differential expression results for testing."
+  file_type: h5ad
   slots:
     obs:
       - name: id
diff --git a/src/task/api/file_de_test_parquet.yaml b/src/task/api/file_de_test_parquet.yaml
new file mode 100644
index 00000000..48e1a402
--- /dev/null
+++ b/src/task/api/file_de_test_parquet.yaml
@@ -0,0 +1,45 @@
+type: file
+example: resources/neurips-2023-data/de_test.parquet
+info:
+  label: DE test
+  summary: "Differential expression results for testing."
+  file_type: parquet
+  columns:
+    - name: id
+      type: integer
+      description: Index of the test observation
+      required: true
+    - name: cell_type
+      type: string
+      description: "The annotated cell type of each cell based on RNA expression."
+      required: true
+    - name: sm_name
+      type: string
+      description: |
+        The primary name for the (parent) compound (in a standardized representation)
+        as chosen by LINCS. This is provided to map the data in this experiment to 
+        the LINCS Connectivity Map data.
+      required: true
+    - name: sm_lincs_id
+      type: string
+      description: |
+        The global LINCS ID (parent) compound (in a standardized representation).
+        This is provided to map the data in this experiment to the LINCS Connectivity
+        Map data.
+      required: true
+    - name: SMILES
+      type: string
+      description: |
+        Simplified molecular-input line-entry system (SMILES) representations of the
+        compounds used in the experiment. This is a 1D representation of molecular
+        structure. These SMILES are provided by Cellarity based on the specific
+        compounds ordered for this experiment.
+      required: true
+    - name: split
+      type: string
+      description: "Split. Must be one of 'control', 'train', 'public_test', or 'private_test'"
+      required: true
+    - name: control
+      type: boolean
+      description: "Boolean indicating whether this instance was used as a control."
+      required: true
diff --git a/src/dge_perturbation_prediction/api/file_de_train.yaml b/src/task/api/file_de_train_h5ad.yaml
similarity index 99%
rename from src/dge_perturbation_prediction/api/file_de_train.yaml
rename to src/task/api/file_de_train_h5ad.yaml
index 2ff05947..5f06fea3 100644
--- a/src/dge_perturbation_prediction/api/file_de_train.yaml
+++ b/src/task/api/file_de_train_h5ad.yaml
@@ -3,6 +3,7 @@ example: resources/neurips-2023-data/de_train.h5ad
 info:
   label: DE train
   summary: "Differential expression results for training."
+  file_type: h5ad
   slots:
     obs:
       - name: cell_type
diff --git a/src/task/api/file_de_train_parquet.yaml b/src/task/api/file_de_train_parquet.yaml
new file mode 100644
index 00000000..6d64b095
--- /dev/null
+++ b/src/task/api/file_de_train_parquet.yaml
@@ -0,0 +1,41 @@
+type: file
+example: resources/neurips-2023-data/de_train.parquet
+info:
+  label: DE train
+  summary: "Differential expression results for training."
+  file_type: parquet
+  columns:
+    - name: cell_type
+      type: string
+      description: "The annotated cell type of each cell based on RNA expression."
+      required: true
+    - name: sm_name
+      type: string
+      description: |
+        The primary name for the (parent) compound (in a standardized representation)
+        as chosen by LINCS. This is provided to map the data in this experiment to 
+        the LINCS Connectivity Map data.
+      required: true
+    - name: sm_lincs_id
+      type: string
+      description: |
+        The global LINCS ID (parent) compound (in a standardized representation).
+        This is provided to map the data in this experiment to the LINCS Connectivity
+        Map data.
+      required: true
+    - name: SMILES
+      type: string
+      description: |
+        Simplified molecular-input line-entry system (SMILES) representations of the
+        compounds used in the experiment. This is a 1D representation of molecular
+        structure. These SMILES are provided by Cellarity based on the specific
+        compounds ordered for this experiment.
+      required: true
+    - name: split
+      type: string
+      description: "Split. Must be one of 'control', 'train', 'public_test', or 'private_test'"
+      required: true
+    - name: control
+      type: boolean
+      description: "Boolean indicating whether this instance was used as a control."
+      required: true
diff --git a/src/task/api/file_id_map.yaml b/src/task/api/file_id_map.yaml
new file mode 100644
index 00000000..19b9c043
--- /dev/null
+++ b/src/task/api/file_id_map.yaml
@@ -0,0 +1,19 @@
+type: file
+example: resources/neurips-2023-data/id_map.csv
+info:
+  label: ID Map
+  summary: "File indicates the order of de_test, the cell types and the small molecule names."
+  file_type: csv
+  columns:
+    - name: id
+      type: integer
+      description: Index of the test observation
+      required: true
+    - name: cell_type
+      type: string
+      description: "Cell type name"
+      required: true
+    - name: sm_name
+      type: string
+      description: "Small molecule name"
+      required: true
diff --git a/src/task/api/file_lincs_id_compound_mapping.yaml b/src/task/api/file_lincs_id_compound_mapping.yaml
new file mode 100644
index 00000000..ca413cc5
--- /dev/null
+++ b/src/task/api/file_lincs_id_compound_mapping.yaml
@@ -0,0 +1,24 @@
+type: file
+example: resources/neurips-2023-raw/lincs_id_compound_mapping.parquet
+info:
+  label: Mapping compound names to lincs ids and smiles
+  summary: "Parquet file mapping compound names to lincs ids and smiles."
+  file_type: parquet
+  columns:
+    - name: compound_id
+      description: "Unique identifier for the compound."
+      type: string
+      required: true
+    - name: sm_lincs_id
+      description: "LINCS identifier for the compound."
+      type: string
+      required: true
+    - name: sm_name
+      description: "Name of the compound."
+      type: string
+      required: true
+    - name: smiles
+      description: "SMILES notation representing the molecular structure."
+      type: string
+      required: true
+
diff --git a/src/task/api/file_prediction.yaml b/src/task/api/file_prediction.yaml
new file mode 100644
index 00000000..0a8bca07
--- /dev/null
+++ b/src/task/api/file_prediction.yaml
@@ -0,0 +1,11 @@
+type: file
+example: resources/neurips-2023-data/prediction.parquet
+info:
+  label: Prediction
+  summary: "Differential Gene Expression prediction"
+  file_type: parquet
+  columns:
+    - name: id
+      type: integer
+      description: Index of the test observation
+      required: true
\ No newline at end of file
diff --git a/src/dge_perturbation_prediction/api/file_sc_counts.yaml b/src/task/api/file_sc_counts.yaml
similarity index 99%
rename from src/dge_perturbation_prediction/api/file_sc_counts.yaml
rename to src/task/api/file_sc_counts.yaml
index 77e18a76..493cc6a2 100644
--- a/src/dge_perturbation_prediction/api/file_sc_counts.yaml
+++ b/src/task/api/file_sc_counts.yaml
@@ -3,6 +3,7 @@ example: resources/neurips-2023-raw/sc_counts.h5ad
 info:
   label: Single Cell Counts
   summary: "Anndata with the counts of the whole dataset."
+  file_type: h5ad
   slots:
     obs:
       - name: dose_uM
diff --git a/src/dge_perturbation_prediction/api/file_score.yaml b/src/task/api/file_score.yaml
similarity index 91%
rename from src/dge_perturbation_prediction/api/file_score.yaml
rename to src/task/api/file_score.yaml
index 49e25abd..b81283a4 100644
--- a/src/dge_perturbation_prediction/api/file_score.yaml
+++ b/src/task/api/file_score.yaml
@@ -1,8 +1,9 @@
 type: file
-example: resources/neurips-2023-data/score_rf.json
+example: resources/neurips-2023-data/score.h5ad
 info:
   label: Score
   summary: "File indicating the score of a metric."
+  file_type: h5ad
   slots:
     uns:
       - type: string
diff --git a/src/task/api/task_info.yaml b/src/task/api/task_info.yaml
new file mode 100644
index 00000000..ccef9ad0
--- /dev/null
+++ b/src/task/api/task_info.yaml
@@ -0,0 +1,97 @@
+name: dge_perturbation_prediction
+label: DGE Perturbation Prediction
+summary: Predicting how small molecules change gene expression in different cell types.
+readme: |
+  ## Installation
+
+  You need to have Docker, Java, and Viash installed. Follow
+  [these instructions](https://openproblems.bio/documentation/fundamentals/requirements)
+  to install the required dependencies.
+
+  ## First steps
+  
+  To get started, you can run the following commands:
+
+  ```bash
+  git clone git@github.com:openproblems-bio/task-dge-perturbation-prediction.git
+
+  cd task-dge-perturbation-prediction
+  
+  # download resources
+  scripts/download_resources.sh
+  ```
+
+  To run the benchmark, you first need to build the components. Afterwards, you can run the benchmark:
+  
+  ```bash
+  viash ns build --parallel --setup cachedbuild
+
+  scripts/run_benchmark.sh
+  ```
+
+  After adding a component, it is recommended to run the tests to ensure that the component is working correctly:
+
+  ```bash
+  viash ns test --parallel
+  ```
+  
+  Optionally, you can provide the `--query` argument to test only a subset of components:
+
+  ```bash
+  viash ns test --parallel --query "component_name"
+  ```
+motivation: |
+  Human biology can be complex, in part due to the function and interplay of the body's
+  approximately 37 trillion cells, which are organized into tissues, organs, and systems.
+  However, recent advances in single-cell technologies have provided unparalleled insight
+  into the function of cells and tissues at the level of DNA, RNA, and proteins. Yet
+  leveraging single-cell methods to develop medicines requires mapping causal links
+  between chemical perturbations and the downstream impact on cell state. These experiments
+  are costly and labor intensive, and not all cells and tissues are amenable to
+  high-throughput transcriptomic screening. If data science could help accurately predict
+  chemical perturbations in new cell types, it could accelerate and expand the development
+  of new medicines.
+
+  Several methods have been developed for drug perturbation prediction, most of which are
+  variations on the autoencoder architecture (Dr.VAE, scGEN, and ChemCPA). However, these
+  methods lack proper benchmarking datasets with diverse cell types to determine how well
+  they generalize. The largest available training dataset is the NIH-funded Connectivity
+  Map (CMap), which comprises over 1.3M small molecule perturbation measurements. However,
+  the CMap includes observations of only 978 genes, less than 5% of all genes. Furthermore,
+  the CMap data is comprised almost entirely of measurements in cancer cell lines, which
+  may not accurately represent human biology.
+description: |
+  This task aims to predict how small molecules change gene expression in different cell
+  types. This task was a [Kaggle competition](https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/overview)
+  as part of the [NeurIPS 2023 competition track](https://neurips.cc/virtual/2023/competition/66586).
+
+  The task is to predict the gene expression profile of a cell after a small molecule
+  perturbation. For this competition, we designed and generated a novel single-cell
+  perturbational dataset in human peripheral blood mononuclear cells (PBMCs). We
+  selected 144 compounds from the Library of Integrated Network-Based Cellular Signatures
+  (LINCS) Connectivity Map dataset ([PMID: 29195078](https://pubmed.ncbi.nlm.nih.gov/29195078/))
+  and measured single-cell gene
+  expression profiles after 24 hours of treatment. The experiment was repeated in three
+  healthy human donors, and the compounds were selected based on diverse transcriptional
+  signatures observed in CD34+ hematopoietic stem cells (data not released). We performed
+  this experiment in human PBMCs because the cells are commercially available with
+  pre-obtained consent for public release and PBMCs are a primary, disease-relevant tissue
+  that contains multiple mature cell types (including T-cells, B-cells, myeloid cells,
+  and NK cells) with established markers for annotation of cell types. To supplement this
+  dataset, we also measured cells from each donor at baseline with joint scRNA and
+  single-cell chromatin accessibility measurements using the 10x Multiome assay. We hope
+  that the addition of rich multi-omic data for each donor and cell type at baseline will
+  help establish biological priors that explain the susceptibility of particular genes to
+  exhibit perturbation responses in difference biological contexts.
+
+authors:
+  - name: Artur Szałata
+    roles: [ author ]
+    info:
+      github: szalata
+      orcid: "000-0001-8413-234X"
+  - name: Robrecht Cannoodt
+    roles: [ author ]
+    info:
+      github: rcannood
+      orcid: "0000-0003-3641-729X"
diff --git a/src/dge_perturbation_prediction/control_methods/ground_truth/config.vsh.yaml b/src/task/control_methods/ground_truth/config.vsh.yaml
similarity index 91%
rename from src/dge_perturbation_prediction/control_methods/ground_truth/config.vsh.yaml
rename to src/task/control_methods/ground_truth/config.vsh.yaml
index e14ec846..f892c14a 100644
--- a/src/dge_perturbation_prediction/control_methods/ground_truth/config.vsh.yaml
+++ b/src/task/control_methods/ground_truth/config.vsh.yaml
@@ -16,6 +16,8 @@ platforms:
     setup:
       - type: r
         cran: [ arrow, dplyr ]
+      - type: python
+        packages: [ fastparquet ]
   - type: nextflow
     directives:
       label: [ midtime, highmem, highcpu ]
\ No newline at end of file
diff --git a/src/dge_perturbation_prediction/control_methods/ground_truth/script.R b/src/task/control_methods/ground_truth/script.R
similarity index 100%
rename from src/dge_perturbation_prediction/control_methods/ground_truth/script.R
rename to src/task/control_methods/ground_truth/script.R
diff --git a/src/dge_perturbation_prediction/control_methods/sample/config.vsh.yaml b/src/task/control_methods/sample/config.vsh.yaml
similarity index 90%
rename from src/dge_perturbation_prediction/control_methods/sample/config.vsh.yaml
rename to src/task/control_methods/sample/config.vsh.yaml
index f141418e..5a2dc093 100644
--- a/src/dge_perturbation_prediction/control_methods/sample/config.vsh.yaml
+++ b/src/task/control_methods/sample/config.vsh.yaml
@@ -16,6 +16,8 @@ platforms:
     setup:
       - type: r
         cran: [ arrow, dplyr ]
+      - type: python
+        packages: [ fastparquet ]
   - type: nextflow
     directives:
       label: [ midtime, highmem, highcpu ]
\ No newline at end of file
diff --git a/src/dge_perturbation_prediction/control_methods/sample/script.R b/src/task/control_methods/sample/script.R
similarity index 100%
rename from src/dge_perturbation_prediction/control_methods/sample/script.R
rename to src/task/control_methods/sample/script.R
diff --git a/src/dge_perturbation_prediction/control_methods/baseline_zero/config.vsh.yaml b/src/task/control_methods/zeros/config.vsh.yaml
similarity index 70%
rename from src/dge_perturbation_prediction/control_methods/baseline_zero/config.vsh.yaml
rename to src/task/control_methods/zeros/config.vsh.yaml
index a6537314..78ac5e05 100644
--- a/src/dge_perturbation_prediction/control_methods/baseline_zero/config.vsh.yaml
+++ b/src/task/control_methods/zeros/config.vsh.yaml
@@ -1,10 +1,11 @@
 __merge__: ../../api/comp_control_method.yaml
 functionality:
-  name: baseline_zero
+  name: zeros
   info:
-    label: Zero baseline
-    summary: Baseline method that predicts no change for any gene.
-    description: Baseline.
+    label: Zeros
+    summary: Baseline method that predicts all zeros
+    description: |
+      Baseline method that predicts all zeros.
   resources:
     - type: python_script
       path: script.py
diff --git a/src/dge_perturbation_prediction/control_methods/baseline_zero/script.py b/src/task/control_methods/zeros/script.py
similarity index 100%
rename from src/dge_perturbation_prediction/control_methods/baseline_zero/script.py
rename to src/task/control_methods/zeros/script.py
diff --git a/src/dge_perturbation_prediction/methods/random_forest/config.vsh.yaml b/src/task/methods/random_forest/config.vsh.yaml
similarity index 92%
rename from src/dge_perturbation_prediction/methods/random_forest/config.vsh.yaml
rename to src/task/methods/random_forest/config.vsh.yaml
index 05746d1c..a1393d51 100644
--- a/src/dge_perturbation_prediction/methods/random_forest/config.vsh.yaml
+++ b/src/task/methods/random_forest/config.vsh.yaml
@@ -18,6 +18,8 @@ platforms:
     setup:
       - type: r
         cran: [ ranger, arrow, pbapply ]
+      - type: python
+        packages: [ fastparquet ]
   - type: nextflow
     directives:
       label: [ midtime, highmem, highcpu ]
\ No newline at end of file
diff --git a/src/dge_perturbation_prediction/methods/random_forest/script.R b/src/task/methods/random_forest/script.R
similarity index 100%
rename from src/dge_perturbation_prediction/methods/random_forest/script.R
rename to src/task/methods/random_forest/script.R
diff --git a/src/dge_perturbation_prediction/metrics/mean_rowwise_rmse/config.vsh.yaml b/src/task/metrics/mean_rowwise_rmse/config.vsh.yaml
similarity index 89%
rename from src/dge_perturbation_prediction/metrics/mean_rowwise_rmse/config.vsh.yaml
rename to src/task/metrics/mean_rowwise_rmse/config.vsh.yaml
index 5e4fb0bb..a6693665 100644
--- a/src/dge_perturbation_prediction/metrics/mean_rowwise_rmse/config.vsh.yaml
+++ b/src/task/metrics/mean_rowwise_rmse/config.vsh.yaml
@@ -14,6 +14,11 @@ functionality:
           $$
 
           where \\(R\\) is the number of scored rows, and \\(y_{ij}\\) and \\(\widehat{y}_{ij}\\) are the actual and predicted values, respectively, for row \\(i\\) and column \\(j\\), and \\(n\\) is the number of columns.
+        repository_url: "..."
+        documentation_url: "..."
+        min: 0
+        max: "+inf"
+        maximize: false
   resources:
     - type: python_script
       path: script.py
diff --git a/src/task/metrics/mean_rowwise_rmse/script.py b/src/task/metrics/mean_rowwise_rmse/script.py
new file mode 100644
index 00000000..b1ae03c6
--- /dev/null
+++ b/src/task/metrics/mean_rowwise_rmse/script.py
@@ -0,0 +1,40 @@
+import pandas as pd
+import anndata as ad
+
+## VIASH START
+par = {
+    "de_test": "resources/neurips-2023-data/de_test.parquet",
+    "prediction": "resources/neurips-2023-data/output_rf.parquet",
+    "output": "resources/neurips-2023-data/score.h5ad",
+}
+## VIASH END
+
+print("Load data", flush=True)
+de_test = pd.read_parquet(par["de_test"]).set_index('id')
+prediction = pd.read_parquet(par["prediction"]).set_index('id')
+
+print("Select genes", flush=True)
+genes = list(set(de_test.columns) - set(["cell_type", "sm_name", "sm_lincs_id", "SMILES", "split", "control"]))
+de_test = de_test.loc[:, genes]
+prediction = prediction[genes]
+
+print("Calculate mean rowwise RMSE", flush=True)
+mean_rowwise_rmse = 0
+for i in de_test.index:
+    mean_rowwise_rmse += ((de_test.iloc[i] - prediction.iloc[i])**2).mean()
+
+mean_rowwise_rmse /= de_test.shape[0]
+
+print("Create output", flush=True)
+output = ad.AnnData(
+    uns = {
+        # this info is not stored in the parquet files
+        "dataset_id": "unknown",
+        "method_id": "unknown",
+        "metric_ids": ["mean_rowwise_rmse"],
+        "metric_values": [mean_rowwise_rmse]
+    }
+)
+
+print("Write output", flush=True)
+output.write_h5ad(par["output"], compression="gzip")
\ No newline at end of file
diff --git a/src/dge_perturbation_prediction/process_dataset/clean_sc_counts/config.vsh.yaml b/src/task/process_dataset/clean_sc_counts/config.vsh.yaml
similarity index 94%
rename from src/dge_perturbation_prediction/process_dataset/clean_sc_counts/config.vsh.yaml
rename to src/task/process_dataset/clean_sc_counts/config.vsh.yaml
index a831fb11..f0b73642 100644
--- a/src/dge_perturbation_prediction/process_dataset/clean_sc_counts/config.vsh.yaml
+++ b/src/task/process_dataset/clean_sc_counts/config.vsh.yaml
@@ -1,6 +1,6 @@
 functionality:
   name: clean_sc_counts
-  namespace: dge_perturbation_prediction/process_dataset
+  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
diff --git a/src/dge_perturbation_prediction/process_dataset/clean_sc_counts/script.py b/src/task/process_dataset/clean_sc_counts/script.py
similarity index 100%
rename from src/dge_perturbation_prediction/process_dataset/clean_sc_counts/script.py
rename to src/task/process_dataset/clean_sc_counts/script.py
diff --git a/src/dge_perturbation_prediction/process_dataset/compute_pseudobulk/config.vsh.yaml b/src/task/process_dataset/compute_pseudobulk/config.vsh.yaml
similarity index 93%
rename from src/dge_perturbation_prediction/process_dataset/compute_pseudobulk/config.vsh.yaml
rename to src/task/process_dataset/compute_pseudobulk/config.vsh.yaml
index 725f8335..69599c74 100644
--- a/src/dge_perturbation_prediction/process_dataset/compute_pseudobulk/config.vsh.yaml
+++ b/src/task/process_dataset/compute_pseudobulk/config.vsh.yaml
@@ -1,6 +1,6 @@
 functionality:
   name: compute_pseudobulk
-  namespace: dge_perturbation_prediction/process_dataset
+  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
diff --git a/src/dge_perturbation_prediction/process_dataset/compute_pseudobulk/script.py b/src/task/process_dataset/compute_pseudobulk/script.py
similarity index 100%
rename from src/dge_perturbation_prediction/process_dataset/compute_pseudobulk/script.py
rename to src/task/process_dataset/compute_pseudobulk/script.py
diff --git a/src/dge_perturbation_prediction/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml b/src/task/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
similarity index 95%
rename from src/dge_perturbation_prediction/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
rename to src/task/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
index 77cbbdae..31e742c5 100644
--- a/src/dge_perturbation_prediction/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
+++ b/src/task/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
@@ -1,6 +1,6 @@
 functionality:
   name: convert_h5ad_to_parquet
-  namespace: dge_perturbation_prediction/process_dataset
+  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
diff --git a/src/dge_perturbation_prediction/process_dataset/convert_h5ad_to_parquet/script.py b/src/task/process_dataset/convert_h5ad_to_parquet/script.py
similarity index 100%
rename from src/dge_perturbation_prediction/process_dataset/convert_h5ad_to_parquet/script.py
rename to src/task/process_dataset/convert_h5ad_to_parquet/script.py
diff --git a/src/dge_perturbation_prediction/process_dataset/run_limma/config.vsh.yaml b/src/task/process_dataset/run_limma/config.vsh.yaml
similarity index 95%
rename from src/dge_perturbation_prediction/process_dataset/run_limma/config.vsh.yaml
rename to src/task/process_dataset/run_limma/config.vsh.yaml
index 4b65bc90..034f38f9 100644
--- a/src/dge_perturbation_prediction/process_dataset/run_limma/config.vsh.yaml
+++ b/src/task/process_dataset/run_limma/config.vsh.yaml
@@ -1,6 +1,6 @@
 functionality:
   name: run_limma
-  namespace: dge_perturbation_prediction/process_dataset
+  namespace: process_dataset
   info:
     type: process_dataset
     type_info:
diff --git a/src/dge_perturbation_prediction/process_dataset/run_limma/script.R b/src/task/process_dataset/run_limma/script.R
similarity index 100%
rename from src/dge_perturbation_prediction/process_dataset/run_limma/script.R
rename to src/task/process_dataset/run_limma/script.R
diff --git a/src/task/process_dataset/workflow/config.vsh.yaml b/src/task/process_dataset/workflow/config.vsh.yaml
new file mode 100644
index 00000000..bfe623c6
--- /dev/null
+++ b/src/task/process_dataset/workflow/config.vsh.yaml
@@ -0,0 +1,51 @@
+__merge__: ../../api/comp_process_dataset.yaml
+functionality:
+  name: workflow
+  arguments:
+    - name: "--dataset_id"
+      type: string
+      description: Unique identifier of the dataset.
+      required: true
+    - name: "--dataset_name"
+      type: string
+      description: Nicely formatted name.
+      required: true
+    - name: "--dataset_url"
+      type: string
+      description: Link to the original source of the dataset.
+      required: false
+    - name: "--dataset_reference"
+      type: string
+      description: Bibtex reference of the paper in which the dataset was published.
+      required: false
+    - name: "--dataset_summary"
+      type: string
+      description: Short description of the dataset.
+      required: true
+    - name: "--dataset_description"
+      type: string
+      description: Long description of the dataset.
+      required: true
+    - name: "--dataset_organism"
+      type: string
+      description: The organism of the dataset.
+      required: true
+    - name: --dataset_info
+      type: file
+      description: A yaml file containing the dataset information.
+      required: true
+      direction: output
+      default: dataset_info.yaml
+  resources:
+    - type: nextflow_script
+      path: main.nf
+      entrypoint: run_wf
+  dependencies:
+    - name: process_dataset/clean_sc_counts
+    - name: process_dataset/compute_pseudobulk
+    - name: process_dataset/run_limma
+    - name: process_dataset/convert_h5ad_to_parquet
+platforms:
+  - type: nextflow
+    directives:
+      label: [ midtime, midmem, lowcpu ]
diff --git a/src/dge_perturbation_prediction/process_dataset/workflow/main.nf b/src/task/process_dataset/workflow/main.nf
similarity index 55%
rename from src/dge_perturbation_prediction/process_dataset/workflow/main.nf
rename to src/task/process_dataset/workflow/main.nf
index 5947f588..7966f72e 100644
--- a/src/dge_perturbation_prediction/process_dataset/workflow/main.nf
+++ b/src/task/process_dataset/workflow/main.nf
@@ -11,9 +11,7 @@ workflow run_wf {
         input: "sc_counts",
         lincs_id_compound_mapping: "lincs_id_compound_mapping"
       ],
-      toState: [
-        sc_counts_cleaned: "output"
-      ]
+      toState: [sc_counts_cleaned: "output"]
     )
 
     | compute_pseudobulk.run(
@@ -30,7 +28,7 @@ workflow run_wf {
           output_splits: ["train", "control", "public_test"]
         ]
       },
-      toState: [ de_train_h5ad: "output" ]
+      toState: [de_train_h5ad: "output"]
     )
 
     | run_limma.run(
@@ -42,7 +40,7 @@ workflow run_wf {
           output_splits: ["private_test"]
         ]
       },
-      toState: [ de_test_h5ad: "output" ]
+      toState: [de_test_h5ad: "output"]
     )
 
     | convert_h5ad_to_parquet.run(
@@ -51,19 +49,33 @@ workflow run_wf {
         input_test: "de_test_h5ad"
       ],
       toState: [
-        de_train_parquet: "output_train",
-        de_test_parquet: "output_test",
+        de_train: "output_train",
+        de_test: "output_test",
         id_map: "output_id_map"
       ]
     )
 
-    | setState ([
-      "de_train_h5ad",
-      "de_test_h5ad",
-      "de_train_parquet",
-      "de_test_parquet",
-      "id_map"
-    ])
+    | setState { id, state ->
+      def dataset_info = [
+        dataset_id: state.dataset_id,
+        dataset_name: state.dataset_name,
+        dataset_summary: state.dataset_summary,
+        dataset_description: state.dataset_description,
+        dataset_url: state.dataset_url,
+        dataset_reference: state.dataset_reference,
+        dataset_organism: state.dataset_organism,
+      ]
+      def dataset_info_yaml_blob = toYamlBlob(dataset_info)
+      def dataset_info_file = tempFile("dataset_info.yaml")
+      dataset_info_file.write(dataset_info_yaml_blob)
+
+      [
+        de_train: state.de_train,
+        de_test: state.de_test,
+        id_map: state.id_map,
+        dataset_info: dataset_info_file
+      ]
+    }
 
   emit:
   output_ch
diff --git a/src/task/workflows/run_benchmark/config.vsh.yaml b/src/task/workflows/run_benchmark/config.vsh.yaml
new file mode 100644
index 00000000..a5e65643
--- /dev/null
+++ b/src/task/workflows/run_benchmark/config.vsh.yaml
@@ -0,0 +1,87 @@
+functionality:
+  name: "run_benchmark"
+  namespace: "workflows"
+  argument_groups:
+    - name: Inputs
+      arguments:
+        - name: "--dataset_info"
+          type: file
+          required: true
+          example: resources/neurips-2023-data/dataset_info.yaml
+        - name: "--de_train"
+          __merge__: ../../api/file_de_train_parquet.yaml
+          required: true
+          direction: input
+        - name: "--de_test"
+          __merge__: ../../api/file_de_test_parquet.yaml
+          required: true
+          direction: input
+        - name: "--id_map"
+          __merge__: ../../api/file_id_map.yaml
+          required: true
+          direction: input
+    - name: Outputs
+      arguments:
+        - name: "--scores"
+          type: file
+          required: true
+          direction: output
+          description: A yaml file containing the scores of each of the methods
+          default: score_uns.yaml
+        - name: "--method_configs"
+          type: file
+          required: true
+          direction: output
+          default: method_configs.yaml
+        - name: "--metric_configs"
+          type: file
+          required: true
+          direction: output
+          default: metric_configs.yaml
+        - name: "--dataset_uns"
+          type: file
+          required: true
+          direction: output
+          default: dataset_uns.yaml
+        - name: "--task_info"
+          type: file
+          required: true
+          direction: output
+          default: task_info.yaml
+    - name: Methods
+      arguments:
+        - name: "--method_ids"
+          type: string
+          multiple: true
+          description: A list of method ids to run. If not specified, all methods will be run.
+  resources:
+    - type: nextflow_script
+      path: main.nf
+      entrypoint: run_wf
+    - type: file
+      path: "../../api/task_info.yaml"
+  dependencies:
+    # - name: common/check_dataset_schema
+    #   repository: openproblemsv2
+    - name: common/extract_metadata
+      repository: openproblemsv2
+    - name: control_methods/zeros
+    - name: control_methods/sample
+    - name: control_methods/ground_truth
+    - name: methods/random_forest
+    - name: metrics/mean_rowwise_rmse
+  repositories:
+    - name: openproblemsv2
+      type: github
+      repo: openproblems-bio/openproblems-v2
+      tag: main_build
+platforms:
+  - type: nextflow
+    config:
+      script: |
+        process.errorStrategy = 'ignore'
+        trace {
+            enabled = true
+            overwrite = true
+            file = "${params.publish_dir}/trace.txt"
+        }
diff --git a/src/task/workflows/run_benchmark/main.nf b/src/task/workflows/run_benchmark/main.nf
new file mode 100644
index 00000000..f45eaae6
--- /dev/null
+++ b/src/task/workflows/run_benchmark/main.nf
@@ -0,0 +1,177 @@
+workflow auto {
+  findStates(params, meta.config)
+    | meta.workflow.run(
+      auto: [publish: "state"]
+    )
+}
+
+workflow run_wf {
+  take:
+  input_ch
+
+  main:
+
+  // construct list of methods
+  methods = [
+    ground_truth,
+    sample,
+    zeros,
+    random_forest
+  ]
+
+  // construct list of metrics
+  metrics = [
+    mean_rowwise_rmse
+  ]
+
+  /* **************************
+   * PREPARE DATASET AND TASK *
+   ************************** */
+  dataset_ch = input_ch
+
+    // store original id for later use
+    | map{ id, state ->
+      [id, state + ["_meta": [join_id: id]]]
+    }
+
+    // read the dataset info
+    | map { id, state ->
+      def dataset_info = readYaml(state.dataset_info)
+      [id, state + [dataset_info: dataset_info]]
+    }
+
+  /***************************
+   * RUN METHODS AND METRICS *
+   ***************************/
+  score_ch = dataset_ch
+
+    // run all methods
+    | runEach(
+      components: methods,
+
+      // only run the method if it is in the list of method_ids
+      filter: { id, state, comp ->
+        !state.method_ids || state.method_ids.contains(comp.config.functionality.name)
+      },
+
+      // define a new 'id' by appending the method name to the dataset id
+      id: { id, state, comp ->
+        id + "." + comp.config.functionality.name
+      },
+
+      // use 'fromState' to fetch the arguments the component requires from the overall state
+      fromState: { id, state, comp ->
+        def new_args = [
+          de_train: state.de_train,
+          id_map: state.id_map,
+        ]
+        if (comp.config.functionality.info.type == "control_method") {
+          new_args.de_test = state.de_test
+        }
+        new_args
+      },
+
+      // use 'toState' to publish that component's outputs to the overall state
+      toState: { id, output, state, comp ->
+        state + [
+          method_id: comp.config.functionality.name,
+          method_output: output.output
+        ]
+      }
+    )
+
+    // run all metrics
+    | runEach(
+      components: metrics,
+      id: { id, state, comp ->
+        id + "." + comp.config.functionality.name
+      },
+      // use 'fromState' to fetch the arguments the component requires from the overall state
+      fromState: [
+        de_test: "de_test",
+        prediction: "method_output",
+      ],
+      // use 'toState' to publish that component's outputs to the overall state
+      toState: { id, output, state, comp ->
+        state + [
+          metric_id: comp.config.functionality.name,
+          metric_output: output.output
+        ]
+      }
+    )
+
+  /******************************
+   * GENERATE OUTPUT YAML FILES *
+   ******************************/
+
+  // extract and combine the dataset metadata
+  dataset_meta_ch = dataset_ch
+    | joinStates { ids, states ->
+      // combine the dataset info into one file
+      def dataset_uns = states.collect{it.dataset_info}
+      def dataset_uns_yaml_blob = toYamlBlob(dataset_uns)
+      def dataset_uns_file = tempFile("dataset_uns.yaml")
+      dataset_uns_file.write(dataset_uns_yaml_blob)
+
+      ["output", [dataset_uns: dataset_uns_file]]
+    }
+
+  output_ch = score_ch
+
+    // extract the scores
+    | extract_metadata.run(
+      fromState: [input: "metric_output"],
+      toState: { id, output, state ->
+        state + [
+          score_uns: readYaml(output.output).uns
+        ]
+      }
+    )
+
+    | joinStates { ids, states ->
+      // store the method configs in a file
+      def method_configs = methods.collect{it.config}
+      def method_configs_yaml_blob = toYamlBlob(method_configs)
+      def method_configs_file = tempFile("method_configs.yaml")
+      method_configs_file.write(method_configs_yaml_blob)
+
+      // store the metric configs in a file
+      def metric_configs = metrics.collect{it.config}
+      def metric_configs_yaml_blob = toYamlBlob(metric_configs)
+      def metric_configs_file = tempFile("metric_configs.yaml")
+      metric_configs_file.write(metric_configs_yaml_blob)
+
+      def task_info_file = meta.resources_dir.resolve("task_info.yaml")
+
+      // store the scores in a file
+      def score_uns = states.collect{state ->
+        state.score_uns + [
+          dataset_id: state.dataset_info.dataset_id,
+          method_id: state.method_id
+        ]
+      }
+      def score_uns_yaml_blob = toYamlBlob(score_uns)
+      def score_uns_file = tempFile("score_uns.yaml")
+      score_uns_file.write(score_uns_yaml_blob)
+
+      def new_state = [
+        method_configs: method_configs_file,
+        metric_configs: metric_configs_file,
+        task_info: task_info_file,
+        scores: score_uns_file,
+        _meta: states[0]._meta
+      ]
+      
+      ["output", new_state]
+    }
+
+    // merge all of the output data 
+    | mix(dataset_meta_ch)
+    | joinStates{ ids, states ->
+      def mergedStates = states.inject([:]) { acc, m -> acc + m }
+      [ids[0], mergedStates]
+    }
+
+  emit:
+  output_ch
+}
\ No newline at end of file