-
Notifications
You must be signed in to change notification settings - Fork 3
/
generate_constraints_link.py
50 lines (43 loc) · 1.61 KB
/
generate_constraints_link.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import numpy as np
import json
from sklearn.datasets import load_digits, load_iris, load_diabetes
LINK_ARRAY_SIZE = 20
datasets =[
# ("iris", load_iris()),
#("digits", load_digits()),
("diabetes", load_diabetes())
]
def generate(link_array_size):
for name, data_set in datasets:
samples = np.random.choice(len(data_set.data), link_array_size)
must_links = []
cannot_links = []
for sample in samples:
value = data_set.target[sample]
for selected in range(len(data_set.data)):
if value == data_set.target[selected]:
if sample == selected:
continue
must_link = [
np.asarray(data_set.data[sample]),
np.asarray(data_set.data[selected])
]
must_links.append(must_link)
break
else:
continue
samples = np.random.choice(len(data_set.data), link_array_size)
for sample in samples:
value = data_set.target[sample]
for selected in range(len(data_set.data)):
if value != data_set.target[selected]:
cannot_link = [
np.asarray(data_set.data[sample]),
np.asarray(data_set.data[selected])
]
cannot_links.append(cannot_link)
break
else:
continue
links = {'must_link': must_links, 'cannot_link': cannot_links}
np.save(name, links)