Skip to content

Commit

Permalink
Merge pull request #305 from mims-harvard/moleculeace
Browse files Browse the repository at this point in the history
New MPC task and MoleculeACE datasets
  • Loading branch information
amva13 committed Aug 19, 2024
2 parents 0858120 + 17f035f commit 6fbf72f
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 0 deletions.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies:
- dgl==1.1.3
- evaluate==0.4.2
- gget==0.28.4
- moleculeace==3.0.0
- pydantic==2.6.3
- gget==0.28.4
- pydantic==2.6.3
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ datasets==2.20.0
evaluate==0.4.2
fuzzywuzzy>=0.18.0,<1.0
huggingface_hub>=0.20.3,<1.0
moleculeace==3.0.0
mygene>=3.2.2,<4.0.0
numpy>=1.26.4,<2.0.0
openpyxl>=3.0.10,<4.0.0
Expand Down
43 changes: 43 additions & 0 deletions tdc/single_pred/mpc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Molecular Property Cliff Task
# https://github.com/bidd-group/MPCD

# -*- coding: utf-8 -*-
# Author: TDC Team
# License: MIT

import pandas as pd
import sys
import warnings

warnings.filterwarnings("ignore")

from . import single_pred_dataset
from ..utils import print_sys, fuzzy_search, property_dataset_load
from ..metadata import dataset_names


class MPC(single_pred_dataset.DataLoader):

def __init__(self, name, path="./data"):
self.name = name
self.data = None

def get_data(self):
from MoleculeACE import Data, Descriptors #TODO: support non-MoleculeACE
try:
self.data = Data(self.name)
self.data(Descriptors.SMILES)
except:
raise Exception(
"could not find dataset {}. For list of MoleculeAce datasets see https://github.com/bidd-group/MPCD/tree/main?tab=readme-ov-file#overview-of-the-mpc-benchmark-datasets"
.format(self.name))
return self.data

def get_split(self):
d = self.get_data()
train = pd.concat([d.x_train, d.y_train], axis=1)
test = pd.concat([d.x_test, d.y_test], axis=1)
return {
"train": train,
"test": test,
}

0 comments on commit 6fbf72f

Please sign in to comment.