-
Notifications
You must be signed in to change notification settings - Fork 6
/
utils.py
265 lines (253 loc) · 11.6 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
## utils.py
##
## Several utility functions
##
## Copyright (C) 2018, Huan Zhang <huan@huan-zhang.com> and contributors
##
## This program is licenced under the BSD 2-Clause License,
## contained in the LICENCE file in this directory.
## See CREDITS for a list of contributors.
##
import numpy as np
import random
import os
from PIL import Image
def linf_dist(x, y):
return np.linalg.norm(x.flatten() - y.flatten(), ord=np.inf)
def l2_dist(x, y):
return np.linalg.norm(x.flatten() - y.flatten(), ord=2)
def l1_dist(x, y):
return np.linalg.norm(x.flatten() - y.flatten(), ord=1)
def l0_dist(x, y):
return np.linalg.norm(x.flatten() - y.flatten(), ord=0)
def show(img, name = "output.png"):
"""
Show MNSIT digits in the console.
"""
np.save('img', img)
fig = np.around((img + 0.5)*255)
fig = fig.astype(np.uint8).squeeze()
pic = Image.fromarray(fig)
# pic.resize((512,512), resample=PIL.Image.BICUBIC)
pic.save(name)
remap = " .*#"+"#"*100
img = (img.flatten()+.5)*3
return
if len(img) != 784: return
print("START")
for i in range(28):
print("".join([remap[int(round(x))] for x in img[i*28:i*28+28]]))
def binary_search(cond, current, upper = np.inf, lower = 0.0, tol = 0.00001, max_steps = 100, upscale_mult = 10.0, downscale_mult = 10.0, upper_limit = 100000.0):
upper_not_found = True
lower_not_found = True
step = 0
while upper-lower > tol:
success, val = cond(current)
if val is not None:
print("[L2][binary search] step = {}, current = {:.6f}, success = {}, val = {:.2f}".format(step,current,success,val))
else:
print("[L2][binary search] step = {}, current = {:.6f}, success = {}".format(step,current,success))
if success: # success at current value
if upper_not_found: # always success, we have not found the true upper bound
# but this is a valid lower bound
lower = current
# increase search range, try to find an upper bound
current *= upscale_mult
else:
lower = current
current = 0.5 * (lower + upper)
# when initial is a success we always have a valid lower bound
lower_not_found = False
else:
if lower_not_found: # so far always < 0, haven't found eps_LB
upper = current
current /= downscale_mult
else:
upper = current
current = 0.5 * (lower + upper)
# when initial is a failure we always have a valid upper bound
upper_not_found = False
step += 1
if step >= max_steps:
break
if current > upper_limit:
break
return lower
def generate_data(data, samples, targeted=True, random_and_least_likely = False, skip_wrong_label = True,
force_label = None, start=0, ids = None, target_classes = None, target_type = 0b1111,
predictor = None, total_images = -1, imagenet=False, remove_background_class=False,
save_inputs=False, model_name=None, save_inputs_dir=None):
"""
Generate the input data to the attack algorithm.
data: the images to attack
samples: number of samples to use
targeted: if true, construct targeted attacks, otherwise untargeted attacks
start: offset into data to use
ids: true IDs of images in the dataset, if given, will use these images
target_classes: a list of list of labels for each ids
inception: if targeted and inception, randomly sample 100 targets intead of 1000
"""
inputs = []
targets = []
true_labels = []
true_ids = []
information = []
target_candidate_pool = np.eye(data.test_labels.shape[1])
target_candidate_pool_remove_background_class = np.eye(data.test_labels.shape[1] - 1)
print('generating labels...')
print('selecting {} images from {} candidates'.format(total_images if total_images else samples, samples))
if ids is None:
ids = range(samples)
else:
ids = ids[start:start+samples]
if target_classes:
target_classes = target_classes[start:start+samples]
start = 0
if total_images == -1:
total_images = samples
total = 0
n_correct = 0
n_image_added = 0
for i in ids:
total += 1
image_added = False
if targeted:
predicted_label = -1 # unknown
if random_and_least_likely:
# if there is no user specified target classes
if target_classes is None:
original_predict = np.squeeze(predictor(np.array([data.test_data[start+i]])))
num_classes = len(original_predict)
predicted_label = np.argmax(original_predict)
least_likely_label = np.argmin(original_predict)
runnerup_label = np.argsort(original_predict)[-2]
start_class = 1 if (imagenet and not remove_background_class) else 0
random_class = predicted_label
new_seq = [least_likely_label, runnerup_label, predicted_label]
while random_class in new_seq:
random_class = random.randint(start_class, start_class + num_classes - 1)
new_seq[2] = random_class
true_label = np.argmax(data.test_labels[start+i])
seq = []
if true_label != predicted_label and skip_wrong_label:
seq = []
elif force_label is not None:
# true_label == predicted_label
if true_label != force_label:
seq = []
else:
seq.append(true_label)
information.append(str(force_label))
image_added = True
else:
image_added = True
if target_type & 0b10000:
for c in range(num_classes):
if c != predicted_label:
seq.append(c)
information.append('class'+str(c))
else:
if target_type & 0b0100:
# least
seq.append(new_seq[0])
information.append('least')
if target_type & 0b0001:
# top-2
seq.append(new_seq[1])
information.append('runnerup')
if target_type & 0b0010:
# random
seq.append(new_seq[2])
information.append('random')
else:
image_added = True
# use user specified target classes
seq = target_classes[total - 1]
information.extend(len(seq) * ['user'])
else:
image_added = True
if imagenet:
if remove_background_class:
seq = random.sample(range(0,1000), 10)
else:
seq = random.sample(range(1,1001), 10)
information.extend(data.test_labels.shape[1] * ['random'])
else:
seq = range(data.test_labels.shape[1])
information.extend(data.test_labels.shape[1] * ['seq'])
is_correct = np.argmax(data.test_labels[start+i]) == predicted_label
if is_correct:
n_correct += 1
print("[DATAGEN][L1] no = {}, true_id = {}, true_label = {}, predicted = {}, force_label = {}, correct = {}, seq = {}, info = {}".format(total, start + i,
np.argmax(data.test_labels[start+i]), predicted_label, force_label, is_correct, seq, [] if len(seq) == 0 else information[-len(seq):]))
for j in seq:
# skip the original image label
if not force_label and (j == np.argmax(data.test_labels[start+i])):
continue
inputs.append(data.test_data[start+i])
if remove_background_class:
targets.append(target_candidate_pool_remove_background_class[j])
else:
targets.append(target_candidate_pool[j])
true_labels.append(data.test_labels[start+i])
if remove_background_class:
true_labels[-1] = true_labels[-1][1:]
true_ids.append(start+i)
else:
true_label = np.argmax(data.test_labels[start+i])
original_predict = np.squeeze(predictor(np.array([data.test_data[start+i]])))
num_classes = len(original_predict)
predicted_label = np.argmax(original_predict)
is_correct = np.argmax(data.test_labels[start+i]) == predicted_label
if is_correct:
n_correct += 1
if true_label != predicted_label and skip_wrong_label:
pass
else:
image_added = True
inputs.append(data.test_data[start+i])
if remove_background_class:
# shift target class by 1
print(np.argmax(data.test_labels[start+i]))
print(np.argmax(data.test_labels[start+i][1:1001]))
targets.append(data.test_labels[start+i][1:1001])
else:
targets.append(data.test_labels[start+i])
true_labels.append(data.test_labels[start+i])
if remove_background_class:
true_labels[-1] = true_labels[-1][1:]
true_ids.append(start+i)
information.extend(['original'])
print("[DATAGEN][L1] no = {}, true_id = {}, true_label = {}, predicted = {}, correct = {}, added = {}".format(total, start + i, true_label, predicted_label, is_correct, image_added))
if image_added:
n_image_added += 1
if n_image_added >= total_images:
break
inputs = np.array(inputs)
targets = np.array(targets)
true_labels = np.array(true_labels)
true_ids = np.array(true_ids)
print('labels generated')
print('{} images generated in total.'.format(len(inputs)))
print('top-1 accuracy:', n_correct / float(total))
if save_inputs:
if not os.path.exists(save_inputs_dir):
os.makedirs(save_inputs_dir)
save_model_dir = os.path.join(save_inputs_dir,model_name)
if not os.path.exists(save_model_dir):
os.makedirs(save_model_dir)
info_set = list(set(information))
for info_type in info_set:
save_type_dir = os.path.join(save_model_dir,info_type)
if not os.path.exists(save_type_dir):
os.makedirs(save_type_dir)
counter = 0
for i in range(len(information)):
if information[i] == info_type:
df = inputs[i,:,:,0]
df = df.flatten()
np.savetxt(os.path.join(save_type_dir,'point{}.txt'.format(counter)),df,newline='\t')
counter += 1
target_labels = np.array([np.argmax(targets[i]) for i in range(len(information)) if information[i]==info_type])
np.savetxt(os.path.join(save_model_dir,model_name+'_target_'+info_type+'.txt'),target_labels,fmt='%d',delimiter='\n')
return inputs, targets, true_labels, true_ids, information