forked from TrustAGI-Lab/CoLA
-
Notifications
You must be signed in to change notification settings - Fork 3
/
utils.py
118 lines (101 loc) · 4.33 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import numpy as np
import networkx as nx
import scipy.sparse as sp
import torch
import scipy.io as sio
import random
import dgl
def sparse_to_tuple(sparse_mx, insert_batch=False):
"""Convert sparse matrix to tuple representation."""
"""Set insert_batch=True if you want to insert a batch dimension."""
def to_tuple(mx):
if not sp.isspmatrix_coo(mx):
mx = mx.tocoo()
if insert_batch:
coords = np.vstack((np.zeros(mx.row.shape[0]), mx.row, mx.col)).transpose()
values = mx.data
shape = (1,) + mx.shape
else:
coords = np.vstack((mx.row, mx.col)).transpose()
values = mx.data
shape = mx.shape
return coords, values, shape
if isinstance(sparse_mx, list):
for i in range(len(sparse_mx)):
sparse_mx[i] = to_tuple(sparse_mx[i])
else:
sparse_mx = to_tuple(sparse_mx)
return sparse_mx
def preprocess_features(features):
"""Row-normalize feature matrix and convert to tuple representation"""
rowsum = np.array(features.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
features = r_mat_inv.dot(features)
return features.todense(), sparse_to_tuple(features)
def normalize_adj(adj):
"""Symmetrically normalize adjacency matrix."""
adj = sp.coo_matrix(adj)
rowsum = np.array(adj.sum(1))
d_inv_sqrt = np.power(rowsum, -0.5).flatten()
d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
def dense_to_one_hot(labels_dense, num_classes):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset+labels_dense.ravel()] = 1
return labels_one_hot
def load_mat(dataset, train_rate=0.3, val_rate=0.1):
"""Load .mat dataset."""
data = sio.loadmat("./dataset/{}.mat".format(dataset))
label = data['Label'] if ('Label' in data) else data['gnd']
attr = data['Attributes'] if ('Attributes' in data) else data['X']
network = data['Network'] if ('Network' in data) else data['A']
adj = sp.csr_matrix(network)
feat = sp.lil_matrix(attr)
labels = np.squeeze(np.array(data['Class'],dtype=np.int64) - 1)
num_classes = np.max(labels) + 1
labels = dense_to_one_hot(labels,num_classes)
ano_labels = np.squeeze(np.array(label))
if 'str_anomaly_label' in data:
str_ano_labels = np.squeeze(np.array(data['str_anomaly_label']))
attr_ano_labels = np.squeeze(np.array(data['attr_anomaly_label']))
else:
str_ano_labels = None
attr_ano_labels = None
num_node = adj.shape[0]
num_train = int(num_node * train_rate)
num_val = int(num_node * val_rate)
all_idx = list(range(num_node))
random.shuffle(all_idx)
idx_train = all_idx[ : num_train]
idx_val = all_idx[num_train : num_train + num_val]
idx_test = all_idx[num_train + num_val : ]
return adj, feat, labels, idx_train, idx_val, idx_test, ano_labels, str_ano_labels, attr_ano_labels
def adj_to_dgl_graph(adj):
"""Convert adjacency matrix to dgl format."""
nx_graph = nx.from_scipy_sparse_matrix(adj)
dgl_graph = dgl.DGLGraph(nx_graph)
return dgl_graph
def generate_rwr_subgraph(dgl_graph, subgraph_size):
"""Generate subgraph with RWR algorithm."""
all_idx = list(range(dgl_graph.number_of_nodes()))
reduced_size = subgraph_size - 1
traces = dgl.contrib.sampling.random_walk_with_restart(dgl_graph, all_idx, restart_prob=1, max_nodes_per_seed=subgraph_size*3)
subv = []
for i,trace in enumerate(traces):
subv.append(torch.unique(torch.cat(trace),sorted=False).tolist())
retry_time = 0
while len(subv[i]) < reduced_size:
cur_trace = dgl.contrib.sampling.random_walk_with_restart(dgl_graph, [i], restart_prob=0.9, max_nodes_per_seed=subgraph_size*5)
subv[i] = torch.unique(torch.cat(cur_trace[0]),sorted=False).tolist()
retry_time += 1
if (len(subv[i]) <= 2) and (retry_time >10):
subv[i] = (subv[i] * reduced_size)
subv[i] = subv[i][:reduced_size]
subv[i].append(i)
return subv