-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils_data.py
239 lines (221 loc) · 9.23 KB
/
utils_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
import numpy as np
import os
import scipy.io
import sys
import tensorflow as tf
import utils_cifar
try:
import cPickle
except:
import pickle as cPickle
'''
字典形式的数据:
cifar100 data content:
{
"data" : [(R,G,B, R,G,B ,....),(R,G,B, R,G,B, ...),...] # 50000张图片,每张: 32 * 32 * 3
"coarse_labels":[0,...,19], # 0~19 super category
"filenames":["volcano_s_000012.png",...], # 文件名
"batch_label":"",
"fine_labels":[0,1...99] # 0~99 category
}
'''
def unpickle(Cifar_file):
with open(Cifar_file,mode='rb') as file:
u = cPickle._Unpickler(file)
u.encoding = 'latin1'
dict = u.load()
return dict
def load_data(Cifar_train_file,Cifar_test_file,nb_per_cl_val=0):
xs=[]
ys=[]
for j in range(1):
d = unpickle(Cifar_train_file)
x = d['data']#训练数据。
y = d['fine_labels']#标签
xs.append(x)
ys.append(y)
d = unpickle(Cifar_test_file)
xs.append(d['data'])
ys.append(d['fine_labels'])
#求解图像均值
x = np.concatenate(xs)/np.float32(255)#将xs连接起来 #归一化 2个元素
y = np.concatenate(ys)
x = np.dstack((x[:,:1024],x[:,1024:2048],x[:,2048:]))#原数据1*1024*3(RGB)
x = x.reshape((x.shape[0],32,32,3)).transpose(0,1,2,3)#后三个位置的坐标转换 变成 5000*3*32*32
#60000* 3072# # # # #60000* 1024*3# #减去每个像素的均值
pixel_mean = np.mean(x[0:5000],axis=0)
x -=pixel_mean
#创建训练集
train_sample_cl=500-nb_per_cl_val
X_train = np.zeros((train_sample_cl*100,32,32,3))
Y_train = np.zeros(train_sample_cl*100)
X_vaild = np.zeros((nb_per_cl_val*100,32,32,3))
Y_vaild = np.zeros(nb_per_cl_val*100)
for i in range(100):#一次性将所有样本都分配好了
index_y=np.where(y[0:50000]==i)
np.random.shuffle(index_y)
X_train[i*train_sample_cl:(i+1)*train_sample_cl]=x[index_y[0:train_sample_cl],:,:,:]
Y_train[i*train_sample_cl:(i+1)*train_sample_cl]=y[index_y[0:train_sample_cl]]
# X_vaild[i*nb_per_cl_val:(i+1)*nb_per_cl_val]=x[index_y[train_sample_cl:500],:,:,:]
# Y_vaild[i*nb_per_cl_val:(i+1)*nb_per_cl_val]=y[index_y[train_sample_cl:500]]
X_test = x[50000:,:,:,:]
Y_test = y[50000:].astype(int)
Y_train = Y_train.astype(int)
#这里数据都有了将数据转化为tensor
#需要将数据转化为tensorflow的数据 还要考虑batch_size。
return X_train,Y_train,X_test,Y_test
# return dict(
# X_train = X_train,
# Y_train = Y_train,
# X_test = X_test,
# Y_test = Y_test,
# )
'''
#函数名 : GetData prepare_train_data_batch
#作者:magic
#日期:2019.4.19
#作用:准备可以用来训练模型的数据(包括保留集的数据)
#参数:数据集,对应样本的标签,batch_size
#返回:返回准备好的数据集和对应的标签
'''
#nu_runs:第几次增量学习#nb_cl每次迭代的类别数
def GetData(train_data,train_data_label,xu_protoset,itera,order,nb_cl):
traindata_index = order[itera*nb_cl:(itera+1)*nb_cl]
images =[]
label =[]
file_xu=[]
for i in traindata_index:
index = np.where(train_data_label[0:50000] == i)
images.append(train_data[index])
label.append(train_data_label[index])
file_xu.extend(index)
#添加保留集的数据信息
# xu_protoset[1]=[1,2,3,4,5]
# xu_protoset[2] = [2021, 2022,2023, 2024, 2025]
for i in range(100):
if len(xu_protoset[i]) is not 0:
# 添加图像信息
images.append(train_data[xu_protoset[i]])
# 添加图像标签
label.append(train_data_label[xu_protoset[i]])
file_xu.append(xu_protoset[i])
else:
continue
file_xu = np.concatenate(file_xu)
images = np.concatenate(images)
label = np.concatenate(label)
return images,label,file_xu
def Prepare_train_data_batch(train_data,train_data_label,xu_protoset,itera,order,nb_cl,batch_size=128):
images, label, file_xu = GetData(train_data,train_data_label,xu_protoset,itera,order,nb_cl)
images = tf.cast(images, tf.float32)
label = tf.cast(label, tf.int32)
file_xu = tf.cast(file_xu, tf.int32)
# 从tensor列表中按顺序或随机抽取一个tensor
input_queue = tf.train.slice_input_producer([images, label, file_xu], shuffle=True)
image_batch, label_batch,file_xu_batch = tf.train.batch(input_queue, batch_size=batch_size, num_threads=8, capacity=128)
return image_batch, label_batch,file_xu_batch
def GetData_all(train_data,train_data_label,xu_protoset,itera,order,nb_cl):
traindata_index = order[0:(itera+1)*nb_cl]
images =[]
label =[]
file_xu=[]
for i in traindata_index:
index = np.where(train_data_label[0:50000] == i)
images.append(train_data[index])
label.append(train_data_label[index])
file_xu.append(index)
#添加保留集的数据信息
for i in range(100):
# 添加图像信息
images.append(train_data[xu_protoset[i]])
# 添加图像标签
label.append(train_data_label[xu_protoset[i]])
file_xu.append(xu_protoset[i])
file_xu = np.concatenate(file_xu)
file_xu = np.concatenate(file_xu)
images = np.concatenate(images)
label = np.concatenate(label)
return images,label,file_xu
def Prepare_train_data_batch_all(train_data,train_data_label,xu_protoset,itera,order,nb_cl,batch_size=128):
images,label,file_protoset = GetData_all(train_data,train_data_label,xu_protoset,itera,order,nb_cl)
images = tf.cast(images, tf.float32)
label = tf.cast(label, tf.int32)
file_protoset = tf.cast(file_protoset, tf.int32)
# 从tensor列表中按顺序或随机抽取一个tensor
input_queue = tf.train.slice_input_producer([images, label,file_protoset], shuffle=True)
image_batch, label_batch,file_protoset_batch = tf.train.batch(input_queue, batch_size=batch_size, num_threads=8, capacity=128)
return image_batch, label_batch,file_protoset_batch
#获得测试数据
def GetTestData(test_data,test_data_label,itera,order,nb_cl):
traindata_index = order[0:(itera+1)*nb_cl]
images =[]
label =[]
file_xu=[]
for i in traindata_index:
index = np.where(test_data_label[0:10000] == i)
images.append(test_data[index])
label.append(test_data_label[index])
file_xu.append(index)#图片在矩阵中的序号 可以视作文件名
file_xu = np.concatenate(file_xu)
file_xu = np.concatenate(file_xu)
images = np.concatenate(images)
label = np.concatenate(label)
return images,label,file_xu
def Prepare_test_data_batch(test_data,test_data_label,itera,order,nb_cl,batch_size=128):
images,label,file_protoset = GetTestData(test_data,test_data_label,itera,order,nb_cl)
images = tf.cast(images, tf.float32)
label = tf.cast(label, tf.int32)
file_protoset = tf.cast(file_protoset, tf.int32)
# 从tensor列表中按顺序或随机抽取一个tensor
input_queue = tf.train.slice_input_producer([images, label,file_protoset], shuffle=True)
image_batch, label_batch,file_protoset_batch = tf.train.batch(input_queue, batch_size=batch_size, num_threads=8, capacity=128)
return image_batch, label_batch,file_protoset_batch
'''
#函数名 : reading_data_and_preparing_network
#作者:magic
#日期:2019.6.6
#作用:获得数据样本的模型输出特征(未执行 尚在参数阶段 执行sess.run 后获得)
#参数:数据样本,对应样本的标签,batch_size
#返回:返回网络参数和样本对应的特征。
'''
def reading_data_and_preparing_network(option,train_data,train_data_label,xu_protoset, itera, batch_size, order,nb_cl, save_path):
if option == 'train':
image_batch, label_batch,file_xu_batch = Prepare_train_data_batch(train_data,train_data_label,xu_protoset,itera,order,nb_cl,batch_size=128)
elif option == 'test':
image_batch, label_batch, file_xu_batch = Prepare_test_data_batch(train_data,train_data_label, itera,order, nb_cl, batch_size=128)
label_batch_one_hot = tf.one_hot(label_batch, 100)
### Network and loss function
with tf.variable_scope('ResNet34'):
with tf.device('/gpu:0'):
scores = utils_cifar.ResNet34(image_batch, phase='test')
graph = tf.get_default_graph()
op_feature_map = graph.get_operation_by_name('ResNet34/pool_last/avg').outputs[0]
loss_class = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=label_batch_one_hot, logits=scores))
### Initilization
#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>注意模型保存路径<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
params = dict(cPickle.load(open(save_path + 'model-iteration' + str(nb_cl) + '-%i.pickle' % itera,'rb')))
inits = utils_cifar.get_weight_initializer(params)
return inits, scores, label_batch, loss_class, file_xu_batch,op_feature_map
'''
#函数名 : reading_data_and_preparing_network
#作者:magic
#日期:2019.6.6
#作用:获得数据样本的模型输出特征
#参数:数据样本,对应样本的标签,batch_size
#返回:返回网络参数和样本对应的特征。
'''
def load_class_in_feature_space(nb_cl, batch_size, scores, label_batch, loss_class, file_xu_batch,op_feature_map, sess,file_num):
label_dico = []
Dtot = []
processed_file = []
for i in range(int(np.ceil(file_num / batch_size) + 1)):#执行的次数
sc, l, loss,file_tmp, feat_map_tmp = sess.run(
[scores, label_batch, loss_class, file_xu_batch,op_feature_map])#样本得分 一个batch的样本标签 交叉熵损失 特征输出
processed_file.extend(file_tmp)
label_dico.extend(l)
mapped_prototypes = feat_map_tmp[:, 0, 0, :]
Dtot.append((mapped_prototypes.T) / np.linalg.norm(mapped_prototypes.T, axis=0))#np.linalg.norm 二范数 归一化
Dtot = np.concatenate(Dtot, axis=1)
label_dico = np.array(label_dico)
processed_file = np.array(processed_file)
return Dtot, label_dico, processed_file