Merge remote-tracking branch 'origin/master'

aimat-lab · Sep 28, 2023 · aaf2a01 · aaf2a01
1 parent cf30c10
commit aaf2a01
Show file tree

Hide file tree

Showing 15 changed files with 664 additions and 83 deletions.
diff --git a/kgcnn/backend/_tensorflow.py b/kgcnn/backend/_tensorflow.py
@@ -21,7 +21,7 @@ def scatter_reduce_max(indices, values, shape):
 def scatter_reduce_mean(indices, values, shape):
     indices = tf.expand_dims(indices, axis=-1)
     counts = tf.scatter_nd(indices, tf.ones_like(values), shape)
-    return tf.scatter_nd(indices, values, shape)/counts
+    return tf.math.divide_no_nan(tf.scatter_nd(indices, values, shape), counts)
 
 
 def scatter_reduce_softmax(indices, values, shape, normalize: bool = False):

diff --git a/kgcnn/literature/GIN/_make.py b/kgcnn/literature/GIN/_make.py
@@ -108,7 +108,7 @@ def make_model(inputs: list = None,
 
     # Wrapping disjoint model.
     out = model_disjoint(
-        inputs,
+        [n, disjoint_indices, batch_id_node, count_nodes],
         use_node_embedding=len(inputs[0]['shape']) < 2, input_node_embedding=input_node_embedding,
         depth=depth, gin_args=gin_args, gin_mlp=gin_mlp, last_mlp=last_mlp, dropout=dropout,
         output_embedding=output_embedding, output_mlp=output_mlp

diff --git a/kgcnn/literature/GraphSAGE/_make.py b/kgcnn/literature/GraphSAGE/_make.py
@@ -30,14 +30,14 @@
 model_default = {
     'name': "GraphSAGE",
     'inputs': [
-        {"shape": (None, ), "name": "node_attributes", "dtype": "float32"},
-        {"shape": (None, ), "name": "edge_attributes", "dtype": "float32"},
+        {"shape": (None,), "name": "node_attributes", "dtype": "float32"},
+        {"shape": (None,), "name": "edge_attributes", "dtype": "float32"},
         {"shape": (None, 2), "name": "edge_indices", "dtype": "int64"},
         {"shape": (), "name": "total_nodes", "dtype": "int64"},
         {"shape": (), "name": "total_edges", "dtype": "int64"}
     ],
     "cast_disjoint_kwargs": {},
-    "input_node_embedding":  {"input_dim": 95, "output_dim": 64},
+    "input_node_embedding": {"input_dim": 95, "output_dim": 64},
     "input_edge_embedding": {"input_dim": 5, "output_dim": 64},
     'node_mlp_args': {"units": [100, 50], "use_bias": True, "activation": ['relu', "linear"]},
     'edge_mlp_args': {"units": [100, 50], "use_bias": True, "activation": ['relu', "linear"]},
@@ -121,48 +121,25 @@ def make_model(inputs: list = None,
         **cast_disjoint_kwargs)([batched_nodes, batched_indices, total_nodes, total_edges])
     ed, _, _, _ = CastBatchedAttributesToDisjoint(**cast_disjoint_kwargs)([batched_edges, total_edges])
 
-    # Embedding, if no feature dimension
-    if len(inputs[0]['shape']) < 2:
-        n = Embedding(**input_node_embedding)(n)
-    if len(inputs[1]['shape']) < 2:
-        ed = Embedding(**input_edge_embedding)(ed)
-
-    for i in range(0, depth):
-
-        eu = GatherNodesOutgoing(**gather_args)([n, disjoint_indices])
-        if use_edge_features:
-            eu = Concatenate(**concat_args)([eu, ed])
-
-        eu = GraphMLP(**edge_mlp_args)([eu, batch_id_edge, count_edges])
-
-        # Pool message
-        if pooling_args['pooling_method'] in ["LSTM", "lstm"]:
-            nu = AggregateLocalEdgesLSTM(**pooling_args)([n, eu, disjoint_indices])
-        else:
-            nu = AggregateLocalEdges(**pooling_args)([n, eu, disjoint_indices])  # Summing for each node connection
-
-        nu = Concatenate(**concat_args)([n, nu])  # Concatenate node features with new edge updates
-
-        n = GraphMLP(**node_mlp_args)([nu, batch_id_node, count_nodes])
-
-        n = GraphLayerNormalization()([n, batch_id_node, count_nodes])
+    out = model_disjoint(
+        [n, ed, disjoint_indices, batch_id_node, batch_id_edge, count_nodes, count_edges],
+        use_node_embedding=len(inputs[0]['shape']) < 2, use_edge_embedding=len(inputs[1]['shape']) < 2,
+        input_node_embedding=input_node_embedding, input_edge_embedding=input_edge_embedding,
+        node_mlp_args=node_mlp_args, edge_mlp_args=edge_mlp_args, pooling_args=pooling_args,
+        pooling_nodes_args=pooling_nodes_args, gather_args=gather_args, concat_args=concat_args,
+        use_edge_features=use_edge_features, depth=depth, output_embedding=output_embedding,
+        output_mlp=output_mlp,
+    )
 
     # Regression layer on output
     if output_embedding == 'graph':
-        out = PoolingNodes(**pooling_nodes_args)([count_nodes, n, batch_id_node])
-        out = MLP(**output_mlp)(out)
         out = CastDisjointToGraphState(**cast_disjoint_kwargs)(out)
-
     elif output_embedding == 'node':
-        out = GraphMLP(**output_mlp)([n, batch_id_node, count_nodes])
         if output_to_tensor:
             out = CastDisjointToBatchedAttributes(**cast_disjoint_kwargs)([batched_nodes, out, batch_id_node, node_id])
         else:
             out = CastDisjointToGraphState(**cast_disjoint_kwargs)(out)
 
-    else:
-        raise ValueError("Unsupported output embedding for `GraphSAGE`")
-
     if output_scaling is not None:
         scaler = get_scaler(output_scaling["name"])(**output_scaling)
         out = scaler(out)
@@ -173,6 +150,7 @@ def make_model(inputs: list = None,
     if output_scaling is not None:
         def set_scale(*args, **kwargs):
             scaler.set_scale(*args, **kwargs)
+
         setattr(model, "set_scale", set_scale)
 
     return model
@@ -194,7 +172,7 @@ def model_disjoint(
         depth: int = None,
         output_embedding: str = None,
         output_mlp: dict = None,
-    ):
+):
     n, ed, disjoint_indices, batch_id_node, batch_id_edge, count_nodes, count_edges = inputs
 
     # Embedding, if no feature dimension
@@ -231,4 +209,4 @@ def model_disjoint(
         out = GraphMLP(**output_mlp)([n, batch_id_node, count_nodes])
     else:
         raise ValueError("Unsupported output embedding for `GraphSAGE`")
-    return out
+    return out
diff --git a/training/hyper/hyper_clintox.py b/training/hyper/hyper_clintox.py
@@ -41,7 +41,6 @@
                 "loss": "binary_crossentropy",
                 "metrics": ["binary_accuracy", {"class_name": "AUC", "config": {"name": "auc"}}]
             },
-            "multi_target_indices": None
         },
         "dataset": {
             "class_name": "ClinToxDataset",
@@ -393,4 +392,78 @@
             "kgcnn_version": "4.0.0"
         }
     },
+    "DMPNN": {
+        "model": {
+            "class_name": "make_model",
+            "module_name": "kgcnn.literature.DMPNN",
+            "config": {
+                "name": "DMPNN",
+                "inputs": [
+                    {"shape": (None, 41), "name": "node_attributes", "dtype": "float32"},
+                    {"shape": (None, 11), "name": "edge_attributes", "dtype": "float32"},
+                    {"shape": (None, 2), "name": "edge_indices", "dtype": "int64"},
+                    {"shape": (None, 1), "name": "edge_indices_reverse", "dtype": "int64"},
+                    {"shape": (), "name": "total_nodes", "dtype": "int64"},
+                    {"shape": (), "name": "total_edges", "dtype": "int64"}
+                ],
+                "cast_disjoint_kwargs": {},
+                "input_node_embedding": {"input_dim": 95, "output_dim": 64},
+                "input_edge_embedding": {"input_dim": 5, "output_dim": 64},
+                "input_graph_embedding": {"input_dim": 100, "output_dim": 64},
+                "pooling_args": {"pooling_method": "scatter_sum"},
+                "edge_initialize": {"units": 128, "use_bias": True, "activation": "relu"},
+                "edge_dense": {"units": 128, "use_bias": True, "activation": "linear"},
+                "edge_activation": {"activation": "relu"},
+                "node_dense": {"units": 128, "use_bias": True, "activation": "relu"},
+                "verbose": 10, "depth": 5,
+                "dropout": {"rate": 0.1},
+                "output_embedding": "graph",
+                "output_mlp": {
+                    "use_bias": [True, True, False], "units": [64, 32, 1],
+                    "activation": ["relu", "relu", "sigmoid"]
+                }
+            }
+        },
+        "training": {
+            "fit": {"batch_size": 32, "epochs": 50, "validation_freq": 1, "verbose": 2, "callbacks": []},
+            "compile": {
+                "optimizer": {
+                    "class_name": "Adam",
+                    "config": {
+                        "learning_rate":
+                            {"module": "keras_core.optimizers.schedules",
+                             "class_name": "ExponentialDecay",
+                             "config": {"initial_learning_rate": 0.001,
+                                        "decay_steps": 1600,
+                                        "decay_rate": 0.5, "staircase": False}}
+                    }
+                },
+                # "loss": "kgcnn>BinaryCrossentropyNoNaN",
+                # "metrics": ["kgcnn>BinaryAccuracyNoNaN",
+                #             {"class_name": "kgcnn>AUCNoNaN", "config": {"multi_label": True, "num_labels": 12}}],
+                # "metrics": ["kgcnn>BinaryAccuracyNoNaN", "kgcnn>AUCNoNaN"],
+                "loss": "binary_crossentropy",
+                "metrics": ["binary_accuracy", {"class_name": "AUC", "config": {"name": "auc"}}]
+            }
+        },
+        "dataset": {
+            "class_name": "ClinToxDataset",
+            "module_name": "kgcnn.data.datasets.ClinToxDataset",
+            "config": {},
+            "methods": [
+                {"set_attributes": {}},
+                {"set_train_test_indices_k_fold": {"n_splits": 5, "random_state": 42, "shuffle": True}},
+                {"map_list": {"method": "set_edge_indices_reverse"}},
+                {"map_list": {"method": "count_nodes_and_edges"}},
+            ]
+        },
+        "data": {
+            "data_unit": "mol/L"
+        },
+        "info": {
+            "postfix": "",
+            "postfix_file": "",
+            "kgcnn_version": "4.0.0"
+        }
+    },
 }
diff --git a/training/hyper/hyper_cora_lu.py b/training/hyper/hyper_cora_lu.py
@@ -328,4 +328,78 @@
             "kgcnn_version": "4.0.0"
         }
     },
+    "DMPNN": {
+        "model": {
+            "class_name": "make_model",
+            "module_name": "kgcnn.literature.DMPNN",
+            "config": {
+                "name": "DMPNN",
+                "inputs": [
+                    {"shape": [None, 1433], "name": "node_attributes", "dtype": "float32"},
+                    {"shape": [None, 1], "name": "edge_weights", "dtype": "float32"},
+                    {"shape": [None, 2], "name": "edge_indices", "dtype": "int64"},
+                    {"shape": (None, 1), "name": "edge_indices_reverse", "dtype": "int64"},
+                    {"shape": (), "name": "total_nodes", "dtype": "int64"},
+                    {"shape": (), "name": "total_edges", "dtype": "int64"}
+                ],
+                "cast_disjoint_kwargs": {},
+                "input_node_embedding": {"input_dim": 95, "output_dim": 64},
+                "input_edge_embedding": {"input_dim": 5, "output_dim": 64},
+                "input_graph_embedding": {"input_dim": 100, "output_dim": 64},
+                "pooling_args": {"pooling_method": "scatter_sum"},
+                "edge_initialize": {"units": 128, "use_bias": True, "activation": "relu"},
+                "edge_dense": {"units": 128, "use_bias": True, "activation": "linear"},
+                "edge_activation": {"activation": "relu"},
+                "node_dense": {"units": 128, "use_bias": True, "activation": "relu"},
+                "verbose": 10, "depth": 5,
+                "dropout": {"rate": 0.1},
+                "output_embedding": "node",
+                "output_mlp": {
+                    "use_bias": [True, True, False], "units": [64, 32, 7],
+                    "activation": ["relu", "relu", "softmax"]
+                }
+            }
+        },
+        "training": {
+            "cross_validation": {"class_name": "KFold",
+                                 "config": {"n_splits": 5, "random_state": 42, "shuffle": True}},
+            "multi_target_indices": None,
+            "fit": {"batch_size": 32, "epochs": 300, "validation_freq": 1, "verbose": 2, "callbacks": []},
+            "compile": {
+                "optimizer": {
+                    "class_name": "Adam",
+                    "config": {
+                        "learning_rate":
+                            {"module": "keras_core.optimizers.schedules",
+                             "class_name": "ExponentialDecay",
+                             "config": {"initial_learning_rate": 0.001,
+                                        "decay_steps": 1600,
+                                        "decay_rate": 0.5, "staircase": False}}
+                    }
+                },
+                "loss": "categorical_crossentropy",
+                "weighted_metrics": ["categorical_accuracy", {"class_name": "AUC", "config": {"name": "auc"}}]
+            },
+        },
+        "dataset": {
+            "class_name": "CoraLuDataset",
+            "module_name": "kgcnn.data.datasets.CoraLuDataset",
+            "config": {},
+            "methods": [
+                {"map_list": {"method": "make_undirected_edges"}},
+                {"map_list": {"method": "add_edge_self_loops"}},
+                {"map_list": {"method": "normalize_edge_weights_sym"}},
+                {"map_list": {"method": "set_edge_indices_reverse"}},
+                {"map_list": {"method": "count_nodes_and_edges"}},
+            ]
+        },
+        "data": {
+            "data_unit": ""
+        },
+        "info": {
+            "postfix": "",
+            "postfix_file": "",
+            "kgcnn_version": "4.0.0"
+        }
+    },
 }