-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
75e84b7
commit 3e43c11
Showing
18 changed files
with
3,794 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
# Model Architecture | ||
import torch | ||
import torch.nn as nn | ||
import torch.optim | ||
|
||
|
||
class CustomTransformer(nn.Module): | ||
def __init__(self, num_features, num_labels, d_model=128, num_heads=8, num_layers=6): # num_heads=8 | ||
super(CustomTransformer, self).__init__() | ||
self.embedding = nn.Linear(num_features, d_model) | ||
# Embedding layer for sparse features | ||
# self.embedding = nn.Embedding(num_features, d_model) | ||
|
||
# self.norm = nn.BatchNorm1d(d_model, affine=True) | ||
self.norm = nn.LayerNorm(d_model) | ||
# self.transformer = nn.Transformer(d_model=d_model, nhead=num_heads, num_encoder_layers=num_layers, | ||
# dropout=0.1, device='cuda') | ||
self.transformer = nn.TransformerEncoder( | ||
nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads, device='cuda', dropout=0.3, | ||
activation=nn.GELU(), | ||
batch_first=True), enable_nested_tensor=True, num_layers=num_layers | ||
) | ||
# Dropout layer for regularization | ||
# self.dropout = nn.Dropout(0.2) | ||
self.fc = nn.Linear(d_model, num_labels) | ||
|
||
def forward(self, x): | ||
x = self.embedding(x) | ||
|
||
# x = (self.transformer(x,x)) | ||
x = self.transformer(x) | ||
x = self.norm(x) | ||
# x = self.fc(self.dropout(x)) | ||
x = self.fc(x) | ||
return x | ||
|
||
|
||
class CustomTransformer_v3(nn.Module): # mean + std | ||
def __init__(self, num_features, num_labels, d_model=128, num_heads=8, num_layers=6, dropout=0.3): | ||
super(CustomTransformer_v3, self).__init__() | ||
self.num_target_encodings = 18211 * 4 | ||
self.num_sparse_features = num_features - self.num_target_encodings | ||
|
||
self.sparse_feature_embedding = nn.Linear(self.num_sparse_features, d_model) | ||
self.target_encoding_embedding = nn.Linear(self.num_target_encodings, d_model) | ||
self.norm = nn.LayerNorm(d_model) | ||
|
||
self.concatenation_layer = nn.Linear(2 * d_model, d_model) | ||
self.transformer = nn.TransformerEncoder( | ||
nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads, dropout=dropout, activation=nn.GELU(), | ||
batch_first=True), | ||
num_layers=num_layers | ||
) | ||
self.fc = nn.Linear(d_model, num_labels) | ||
|
||
def forward(self, x): | ||
sparse_features = x[:, :self.num_sparse_features] | ||
target_encodings = x[:, self.num_sparse_features:] | ||
|
||
sparse_features = self.sparse_feature_embedding(sparse_features) | ||
target_encodings = self.target_encoding_embedding(target_encodings) | ||
|
||
combined_features = torch.cat((sparse_features, target_encodings), dim=1) | ||
combined_features = self.concatenation_layer(combined_features) | ||
combined_features = self.norm(combined_features) | ||
|
||
x = self.transformer(combined_features) | ||
x = self.norm(x) | ||
|
||
x = self.fc(x) | ||
return x | ||
|
||
|
||
class CustomMLP(nn.Module): | ||
def __init__(self, input_dim, hidden_dim, output_dim, num_layers=6, dropout=0.3, layer_norm=True): | ||
super(CustomMLP, self).__init__() | ||
layers = [] | ||
|
||
for _ in range(num_layers): | ||
if layer_norm: | ||
layers.append(nn.LayerNorm(input_dim)) | ||
layers.append(nn.Linear(input_dim, hidden_dim)) | ||
layers.append(nn.ReLU()) | ||
if dropout > 0: | ||
layers.append(nn.Dropout(p=dropout)) | ||
input_dim = hidden_dim | ||
|
||
self.model = nn.Sequential(*layers) | ||
self.fc = nn.Linear(hidden_dim, output_dim) | ||
|
||
def forward(self, x): | ||
x = self.model(x) | ||
x = self.fc(x) | ||
return x | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
functionality: | ||
name: "sync_test_resources" | ||
namespace: "common" | ||
version: "dev" | ||
description: Synchronise the test resources from s3 to resources_test | ||
usage: | | ||
sync_test_resources | ||
sync_test_resources --input s3://openproblems-bio/public/neurips-2023-competition/workflow-resources/ --output resources | ||
arguments: | ||
- name: "--input" | ||
alternatives: ["-i"] | ||
type: string | ||
description: "Path to the S3 bucket to sync from." | ||
default: "s3://openproblems-bio/public/neurips-2023-competition/workflow-resources/" | ||
- name: "--output" | ||
alternatives: ["-o"] | ||
type: file | ||
default: resources | ||
direction: output | ||
description: "Path to the test resource directory." | ||
- name: "--quiet" | ||
type: boolean_true | ||
description: "Displays the operations that would be performed using the specified command without actually running them." | ||
- name: "--dryrun" | ||
type: boolean_true | ||
description: "Does not display the operations performed from the specified command." | ||
- name: "--delete" | ||
type: boolean_true | ||
description: "Files that exist in the destination but not in the source are deleted during sync." | ||
- name: "--exclude" | ||
type: "string" | ||
multiple: true | ||
description: Exclude all files or objects from the command that matches the specified pattern. | ||
resources: | ||
- type: bash_script | ||
path: script.sh | ||
platforms: | ||
- type: docker | ||
image: "amazon/aws-cli:2.7.12" | ||
- type: native | ||
- type: nextflow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#!/bin/bash | ||
|
||
## VIASH START | ||
par_input='s3://openproblems-bio/public/neurips-2023-competition/workflow-resources/' | ||
par_output='resources_test' | ||
## VIASH END | ||
|
||
extra_params=( ) | ||
|
||
if [ "$par_quiet" == "true" ]; then | ||
extra_params+=( "--quiet" ) | ||
fi | ||
if [ "$par_dryrun" == "true" ]; then | ||
extra_params+=( "--dryrun" ) | ||
fi | ||
if [ "$par_delete" == "true" ]; then | ||
extra_params+=( "--delete" ) | ||
fi | ||
|
||
if [ ! -z ${par_exclude+x} ]; then | ||
IFS=":" | ||
for var in $par_exclude; do | ||
unset IFS | ||
extra_params+=( "--exclude" "$var" ) | ||
done | ||
fi | ||
|
||
|
||
# Disable the use of the Amazon EC2 instance metadata service (IMDS). | ||
# see https://florian.ec/blog/github-actions-awscli-errors/ | ||
# or https://github.com/aws/aws-cli/issues/5234#issuecomment-705831465 | ||
export AWS_EC2_METADATA_DISABLED=true | ||
|
||
aws s3 sync "$par_input" "$par_output" --no-sign-request "${extra_params[@]}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# Open Problems – Single-Cell Perturbations 2nd Place Solution | ||
|
||
[Kaggle's Open Problems – Single-Cell Perturbations competition. ](https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/overview) | ||
### Setup | ||
cuda version - cu121 | ||
python 3.8 | ||
```bash | ||
pip install -r requirements.txt | ||
``` | ||
|
||
|
||
To train the models modify config_train.yaml and run: | ||
|
||
```bash | ||
python train.py --config config_train.yaml | ||
``` | ||
|
||
|
||
To run the models modify config_test.yaml and run: | ||
|
||
```bash | ||
python predict.py --config config_test.yaml | ||
``` | ||
|
||
Make sure you have a file names sample_submission.csv which is the same | ||
as the resulted dataframe but filled with zeros. | ||
|
||
### Hardware | ||
1x Nvidia RTX 3080 mobile | ||
Windows 11 | ||
11th Gen Intel(R) Core(TM) i7-11800H | ||
|
||
|
||
## Sources | ||
1. [Single Cell Perturbations Kaggle 2nd place solution](https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/458738) | ||
2. [Lion PyTorch by lucidrains](https://github.com/lucidrains/lion-pytorch) | ||
3. [Understanding the Competition - Open Problems](https://www.kaggle.com/code/ayushs9020/understanding-the-competition-open-problems) | ||
4. [OP2 EDA Baseline by alexandervc](https://www.kaggle.com/code/alexandervc/op2-eda-baseline-s) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
# The API specifies which type of component this is. | ||
# It contains specifications for: | ||
# - The input/output files | ||
# - Common parameters | ||
# - A unit test | ||
__merge__: ../../api/comp_method.yaml | ||
|
||
functionality: | ||
# A unique identifier for your component (required). | ||
# Can contain only lowercase letters or underscores. | ||
name: lb2 | ||
|
||
# Metadata for your component | ||
info: | ||
# A relatively short label, used when rendering visualisations (required) | ||
label: lb2 | ||
# A one sentence summary of how this method works (required). Used when | ||
# rendering summary tables. | ||
summary: "FILL IN: A one sentence summary of this method." | ||
# A multi-line description of how this component works (required). Used | ||
# when rendering reference documentation. | ||
description: | | ||
FILL IN: A (multi-line) description of how this method works. | ||
# Which normalisation method this component prefers to use (required). | ||
preferred_normalization: log_cp10k | ||
# A reference key from the bibtex library at src/common/library.bib (required). | ||
reference: bibtex_reference_key | ||
# URL to the documentation for this method (required). | ||
documentation_url: https://url.to/the/documentation | ||
# URL to the code repository for this method (required). | ||
repository_url: https://github.com/organisation/repository | ||
|
||
# Component-specific parameters (optional) | ||
# arguments: | ||
# - type: file | ||
# name: "--config" | ||
# default: "config_train.yaml" | ||
|
||
# Resources required to run the component | ||
resources: | ||
# The script of your component (required) | ||
- type: python_script | ||
path: script.py | ||
- path: models.py | ||
- path: utils.py | ||
- path: sample_submission.csv | ||
|
||
platforms: | ||
- type: docker | ||
image: ghcr.io/openproblems-bio/base_pytorch_nvidia:1.0.4 | ||
setup: | ||
- type: python | ||
packages: [anndata, | ||
pytorch==2.1.0, | ||
torchvision==0.16.0, | ||
torchaudio==2.1.0, | ||
fastparquet , | ||
pyarrow, | ||
pandas~=2.0.3, | ||
scikit-learn~=1.0.1, | ||
tqdm~=4.66.1, | ||
numpy~=1.23, | ||
matplotlib~=3.5.0, | ||
PyYAML~=6.0.1, | ||
lion-pytorch ] | ||
- type: nextflow | ||
directives: | ||
label: [ midtime, lowmem, midcpu ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
n_components_list: # targets dimension list | ||
- 18211 | ||
d_models_list: | ||
- 128 | ||
batch_size: 5 | ||
data_file: 'de_train.parquet' | ||
id_map_file: 'id_map.csv' | ||
device: cuda | ||
seed: null | ||
models_dir: 'trained_models' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
n_components_list: # targets dimension list | ||
- 18211 | ||
d_models_list: | ||
- 128 | ||
batch_size: 32 | ||
sampling_strategy: k-means # Choose either 'k-means' or 'random' | ||
data_file: "/gpfs/gibbs/pi/zhao/tl688/single_cell_pb/de_train.parquet" | ||
id_map_file: "/gpfs/gibbs/pi/zhao/tl688/single_cell_pb/id_map.csv" | ||
validation_percentage: 0.1 | ||
device: cuda | ||
seed: null | ||
num_epochs: 1 #20000 | ||
early_stopping: 5000 |
Oops, something went wrong.