-
Notifications
You must be signed in to change notification settings - Fork 57
/
data_utils.py
67 lines (61 loc) · 2.15 KB
/
data_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from random import randint
from collections import defaultdict
import torch
def heads_tails(n_ent, train_data, valid_data=None, test_data=None):
train_src, train_rel, train_dst = train_data
if valid_data:
valid_src, valid_rel, valid_dst = valid_data
else:
valid_src = valid_rel = valid_dst = []
if test_data:
test_src, test_rel, test_dst = test_data
else:
test_src = test_rel = test_dst = []
all_src = train_src + valid_src + test_src
all_rel = train_rel + valid_rel + test_rel
all_dst = train_dst + valid_dst + test_dst
heads = defaultdict(lambda: set())
tails = defaultdict(lambda: set())
for s, r, t in zip(all_src, all_rel, all_dst):
tails[(s, r)].add(t)
heads[(t, r)].add(s)
heads_sp = {}
tails_sp = {}
for k in tails.keys():
tails_sp[k] = torch.sparse.FloatTensor(torch.LongTensor([list(tails[k])]),
torch.ones(len(tails[k])), torch.Size([n_ent]))
for k in heads.keys():
heads_sp[k] = torch.sparse.FloatTensor(torch.LongTensor([list(heads[k])]),
torch.ones(len(heads[k])), torch.Size([n_ent]))
return heads_sp, tails_sp
def inplace_shuffle(*lists):
idx = []
for i in range(len(lists[0])):
idx.append(randint(0, i))
for ls in lists:
for i, item in enumerate(ls):
j = idx[i]
ls[i], ls[j] = ls[j], ls[i]
def batch_by_num(n_batch, *lists, n_sample=None):
if n_sample is None:
n_sample = len(lists[0])
for i in range(n_batch):
head = int(n_sample * i / n_batch)
tail = int(n_sample * (i + 1) / n_batch)
ret = [ls[head:tail] for ls in lists]
if len(ret) > 1:
yield ret
else:
yield ret[0]
def batch_by_size(batch_size, *lists, n_sample=None):
if n_sample is None:
n_sample = len(lists[0])
head = 0
while head < n_sample:
tail = min(n_sample, head + batch_size)
ret = [ls[head:tail] for ls in lists]
head += batch_size
if len(ret) > 1:
yield ret
else:
yield ret[0]