Skip to content

Commit

Permalink
Merge pull request #30 from yhr91/CRISPROutcome
Browse files Browse the repository at this point in the history
Added CRISPR DNA repair outcomes dataset
  • Loading branch information
kexinhuang12345 authored Dec 30, 2020
2 parents a87f4ed + 00ec22c commit 322bddc
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 7 deletions.
6 changes: 5 additions & 1 deletion tdc/label_name_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,13 @@
drugcomb_targets = ['CSS', 'Synergy_ZIP', 'Synergy_Bliss',
'Synergy_Loewe','Synergy_HSA']

leenay_targets = ['Fraction_Insertions', 'Avg_Insertion_Length', 'Avg_Deletion_Length',
'Indel_Diversity', 'Fraction_Frameshifts']

dataset2target_lists = {'qm7b': QM7_targets,
'qm8': QM8_targets,
'qm9': QM9_targets,
'tap': TAP_targets,
'toxcast': ToxCast_targets,
'tox21': Tox21_targets}
'tox21': Tox21_targets,
'leenay': leenay_targets}
16 changes: 10 additions & 6 deletions tdc/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@

gda_dataset_names = ['disgenet']

crisproutcome_dataset_names = ['leenay']

drugres_dataset_names = ['gdsc1', 'gdsc2']

drugsyn_dataset_names = ['oncopolypharmacology', 'drugcomb_nci60']
Expand Down Expand Up @@ -222,7 +224,8 @@
"Develop",
"QM",
"Paratope",
"Yields"],
"Yields",
"CRISPROutcome"],
'multi_pred': ["DTI",
"PPI",
"DDI",
Expand Down Expand Up @@ -270,7 +273,8 @@ def get_task2category():
"Yields": yield_dataset_names,
"Catalyst": catalyst_dataset_names,
"CompoundLibrary": compound_library_names,
"BioKG": biokg_library_names
"BioKG": biokg_library_names,
"CRISPROutcome": crisproutcome_dataset_names
}

benchmark_names = {"admet_group": admet_benchmark}
Expand Down Expand Up @@ -359,8 +363,8 @@ def get_task2category():
'clearance_hepatocyte_az': 'tab',
'half_life_obach': 'tab',
'ld50_zhu': 'tab',
'vdss_lombardo': 'tab'
}
'vdss_lombardo': 'tab',
'leenay':'tab'}

name2id = {'bbb_adenot': 4259565,
'bbb_martins': 4259566,
Expand Down Expand Up @@ -439,8 +443,8 @@ def get_task2category():
'clearance_hepatocyte_az': 4266187,
'ld50_zhu': 4267146,
'half_life_obach': 4266799,
'vdss_lombardo': 4267387
}
'vdss_lombardo': 4267387,
'leenay':4279966 }

oracle2type = {'drd2': 'pkl',
'jnk3': 'pkl',
Expand Down
26 changes: 26 additions & 0 deletions tdc/single_pred/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,29 @@ def __init__(self, name, path='./data', label_name=None, print_stats=False):
if print_stats:
self.print_stats()
print('Done!', flush = True, file = sys.stderr)

class CRISPROutcome(single_pred_dataset.DataLoader):
"""DNA repair outcomes following a CRISPR experiment.
Parameters
----------
name : str
Description of the variable.
path : str, optional (default="data")
Description of the variable.
label_name : str, optional (default=None)
Description of the variable.
print_stats : bool, optional (default=True)
Description of the variable.
"""

def __init__(self, name, path='./data', label_name=None, print_stats=False):
super().__init__(name, path, label_name, print_stats,
dataset_names=dataset_names["CRISPROutcome"])
self.entity1_name = 'GuideSeq'
if print_stats:
self.print_stats()
print('Done!', flush = True, file = sys.stderr)

0 comments on commit 322bddc

Please sign in to comment.