-
Notifications
You must be signed in to change notification settings - Fork 1
/
combine_and_sample.py
executable file
·80 lines (59 loc) · 1.78 KB
/
combine_and_sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import sys
import numpy as np
import pdb
with open(sys.argv[1]) as f:
lines_x = f.readlines()
with open(sys.argv[2]) as h:
lines_y = h.readlines()
X, Y = [], []
for x_line, y_line in zip(lines_x, lines_y):
x = x_line.replace('\n', '').strip().split(' ')
# print x
x = [float(element) for element in x]
X.append(x)
y = np.log(float(y_line.replace('\n', '')))
Y.append(y)
X = np.asarray(X)
Y = np.asarray(Y)
sort_ind = np.argsort(Y).astype(np.int)
X = X[sort_ind]
Y = -Y[sort_ind]
# pdb.set_trace()
###
# sampling around the true x, we use the most anomaly examples
###
sample_size = 4999
p = 0.05
###
# 1st sample
###
flip = np.random.binomial(1, p, (sample_size,len(X[0])))
orig = np.repeat(X[0][np.newaxis,:], sample_size, axis = 0)
x = np.logical_xor(orig, flip).astype(np.int)
x = np.concatenate((X[0][np.newaxis,:], x), axis = 0)
np.savetxt("bad0.dat", x, fmt="%d", delimiter=" ")
# pdb.set_trace()
###
# 2nd sample
###
flip = np.random.binomial(1, p, (sample_size,len(X[1])))
orig = np.repeat(X[1][np.newaxis,:], sample_size, axis = 0)
x = np.logical_xor(orig, flip).astype(np.int)
x = np.concatenate((X[1][np.newaxis,:], x), axis = 0)
np.savetxt("bad1.dat", x, fmt="%d", delimiter=" ")
###
# 3rd sample
###
flip = np.random.binomial(1, p, (sample_size,len(X[2])))
orig = np.repeat(X[2][np.newaxis,:], sample_size, axis = 0)
x = np.logical_xor(orig, flip).astype(np.int)
x = np.concatenate((X[2][np.newaxis,:], x), axis = 0)
np.savetxt("bad2.dat", x, fmt="%d", delimiter=" ")
###
# 4th sample
###
flip = np.random.binomial(1, p, (sample_size,len(X[3])))
orig = np.repeat(X[3][np.newaxis,:], sample_size, axis = 0)
x = np.logical_xor(orig, flip).astype(np.int)
x = np.concatenate((X[3][np.newaxis,:], x), axis = 0)
np.savetxt("bad3.dat", x, fmt="%d", delimiter=" ")