generated from Knowledge-Graph-Hub/kg-example
-
Notifications
You must be signed in to change notification settings - Fork 2
/
neat.yaml
161 lines (156 loc) · 4.5 KB
/
neat.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
---
# name: "kg-idg"
# description: "KG-IDG"
Target:
target_path: graph_ml
Upload:
s3_bucket: kg-hub-public-data
s3_bucket_dir: kg-idg/
extra_args:
"ACL": "public-read"
GraphDataConfiguration:
source_data:
files:
- path: https://kg-hub.berkeleybop.io/kg-idg/THIS-BUILD-ID/KG-IDG.tar.gz
desc: "Location of KG-IDG nodefile, edgefile, and validation subgraphs."
graph:
directed: False
node_path: merged-kg_nodes.tsv
edge_path: merged-kg_edges.tsv
verbose: True
nodes_column: 'id'
node_list_node_types_column: 'category'
default_node_type: 'biolink:NamedThing'
sources_column: 'subject'
destinations_column: 'object'
default_edge_type: 'biolink:related_to'
evaluation_data:
valid_data:
pos_edge_filepath: pos_valid_edges.tsv
neg_edge_filepath: neg_valid_edges.tsv
train_data:
neg_edge_filepath: neg_train_edges.tsv
EmbeddingsConfig:
filename: KG-IDG-SkipGram.tsv
history_filename: embedding_history.json
node_embedding_params:
node_embedding_method_name: SkipGram
use_mirrored_strategy: False
walk_length: 100
batch_size: 128
window_size: 4
return_weight: 1.0
explore_weight: 1.0
iterations: 20
tsne_file_name: tsne.png
ClassifierContainer:
classifiers:
- classifier_id: mlp_0
classifier_name: neural network
classifier_type: tensorflow.keras.models.Sequential
edge_method: Average
outfile: "model_mlp_kg-idg.model"
history_filename: "model_mlp_kg-idg_history.json"
parameters:
tf_keras_params:
layers_config:
layers:
- type: tensorflow.keras.layers.Input
parameters:
shape: 100 # must match embedding_size up above
- type: tensorflow.keras.layers.Dense
parameters:
units: 128
activation: relu
- type: tensorflow.keras.layers.Dense
parameters:
units: 32
activation: relu
- type: tensorflow.keras.layers.Dropout
parameters:
rate: 0.5
- type: tensorflow.keras.layers.Dense
parameters:
units: 16
activation: relu
- type: tensorflow.keras.layers.Dense
parameters:
units: 1
activation: sigmoid
loss: binary_crossentropy
optimizer: nadam
metrics_config:
metrics:
- name: auprc
type: tensorflow.keras.metrics.AUC
curve: PR
- name: auroc
type: tensorflow.keras.metrics.AUC
curve: ROC
- name: Recall
type: tensorflow.keras.metrics.Recall
- name: Precision
type: tensorflow.keras.metrics.Precision
- type: accuracy
fit_config:
batch_size: 4096
epochs: 10
callbacks_list:
callbacks:
- type: tensorflow.keras.callbacks.EarlyStopping
monitor: val_loss
patience: 5
min_delta: 0.001
- type: tensorflow.keras.callbacks.ReduceLROnPlateau
- classifier_id: rf_0
classifier_name: Random Forest
classifier_type: sklearn.ensemble.RandomForestClassifier
edge_method: Average
outfile: "model_randomforest_kg-idg.model"
parameters:
sklearn_params:
n_estimators: 500
max_depth: 30
n_jobs: 8
random_state: 42
- classifier_id: lr_0
classifier_name: Logistic Regression
classifier_type: sklearn.linear_model.LogisticRegression
edge_method: Average
outfile: "model_lr_kg-idg.model"
parameters:
sklearn_params:
random_state: 42
max_iter: 2000
ApplyTrainedModelsContainer:
models:
- model_id: mlp_0
cutoff: 0.8
outfile: mlp_classifier_predictions_kgx.tsv
node_types:
source:
- 'biolink:Drug'
- 'biolink:Protein'
destination:
- 'biolink:Drug'
- 'biolink:Protein'
- model_id: rf_0
cutoff: 0.8
outfile: rf_classifier_predictions_kgx.tsv
node_types:
source:
- 'biolink:Drug'
- 'biolink:Protein'
destination:
- 'biolink:Drug'
- 'biolink:Protein'
- model_id: lr_0
cutoff: 0.8
outfile: lr_classifier_predictions_kgx.tsv
node_types:
source:
- 'biolink:Drug'
- 'biolink:Protein'
destination:
- 'biolink:Drug'
- 'biolink:Protein'