-
Notifications
You must be signed in to change notification settings - Fork 27
/
test.py
138 lines (120 loc) · 6.22 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import tensorflow as tf
import numpy as np
import cv2
import os
import sys
from data_provider.data_utils import get_vocabulary
from data_provider.json_loader import JSONLoader
from data_provider.text_loader import TextLoader
from utils.transcription_utils import idx2label, calc_metrics
from sar_model import SARModel
from utils.metrics import accuracy
from config import get_args
def get_images(images_dir):
files = []
exts = ['jpg', 'png', 'jpeg', 'JPG']
for parent, dirnames, filenames in os.walk(images_dir):
for filename in filenames:
for ext in exts:
if filename.endswith(ext):
files.append(os.path.join(parent, filename))
break
print('Find {} images'.format(len(files)))
return files
def get_data(args):
if args.test_data_gt != '' and args.test_data_gt is not None:
if args.test_data_gt.split('.')[1] == 'json':
# data_loader = ArTLoader(args.test_data_dir)
data_loader = JSONLoader(args.test_data_dir)
elif args.test_data_gt.split('.')[1] == 'txt':
data_loader = TextLoader(args.test_data_dir)
else:
raise Exception("Unsupported file type")
images_path, labels = data_loader.parse_gt(args.test_data_gt)
return images_path, labels
else:
images_path = get_images(args.test_data_dir)
labels = ['' for i in range(len(images_path))]
return images_path, labels
def data_preprocess(image, word, char2id, args):
H, W, C = image.shape
# Rotate the vertical images
if H > 4 * W:
image = np.rot90(image)
H, W = W, H
new_width = int((1.0 * args.height / H) * W)
new_width = new_width if new_width < args.width else args.width
new_height = args.height
img_resize = np.zeros((args.height, args.width, C), dtype=np.uint8)
image = cv2.resize(image, (new_width, new_height))
img_resize[:, :new_width, :] = image
label = np.full((args.max_len), char2id['PAD'], dtype=np.int)
label_list = []
for char in word:
if char in char2id:
label_list.append(char2id[char])
else:
label_list.append(char2id['UNK'])
# label_list = label_list + [char2id['EOS']]
# assert len(label_list) <= max_len
if len(label_list) > (args.max_len - 1):
label_list = label_list[:(args.max_len - 1)]
label_list = label_list + [char2id['EOS']]
label[:len(label_list)] = np.array(label_list)
return img_resize, label, new_width
def main_test(args):
voc, char2id, id2char = get_vocabulary(voc_type=args.voc_type)
input_images = tf.placeholder(dtype=tf.float32, shape=[1, args.height, None, 3], name="input_images")
input_images_width = tf.placeholder(dtype=tf.float32, shape=[1], name="input_images_width")
input_labels = tf.placeholder(dtype=tf.int32, shape=[1, args.max_len], name="input_labels")
sar_model = SARModel(num_classes=len(voc),
encoder_dim=args.encoder_sdim,
encoder_layer=args.encoder_layers,
decoder_dim=args.decoder_sdim,
decoder_layer=args.decoder_layers,
decoder_embed_dim=args.decoder_edim,
seq_len=args.max_len,
is_training=False)
model_infer, attention_weights, pred = sar_model(input_images, input_labels, input_images_width, reuse=False)
global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False, dtype=tf.int32)
variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
saver = tf.train.Saver(variable_averages.variables_to_restore())
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
ckpt_state = tf.train.get_checkpoint_state(args.checkpoints)
model_path = os.path.join(args.checkpoints, os.path.basename(ckpt_state.model_checkpoint_path))
print('Restore from {}'.format(model_path))
saver.restore(sess, model_path)
images_path, labels = get_data(args)
predicts = []
for img_path, label in zip(images_path, labels):
try:
img = cv2.imread(img_path)
except Exception as e:
print("{} error: {}".format(img_path, e))
continue
img, la, width = data_preprocess(img, label, char2id, args)
pred_value, attention_weights_value = sess.run([pred, attention_weights], feed_dict={input_images: [img],
input_labels: [la],
input_images_width: [width]})
pred_value_str = idx2label(pred_value, id2char, char2id)[0]
print("predict: {} label: {}".format(pred_value_str, label))
predicts.append(pred_value_str)
pred_value_str += '$'
if args.vis_dir != None and args.vis_dir != "":
os.makedirs(args.vis_dir, exist_ok=True)
assert len(img.shape) == 3
att_maps = attention_weights_value.reshape([-1, attention_weights_value.shape[2], attention_weights_value.shape[3], 1]) # T * H * W * 1
for i, att_map in enumerate(att_maps):
if i >= len(pred_value_str):
break
att_map = cv2.resize(att_map, (img.shape[1], img.shape[0]))
_att_map = np.zeros(dtype=np.uint8, shape=[img.shape[0], img.shape[1], 3])
_att_map[:, :, -1] = (att_map * 255).astype(np.uint8)
show_attention = cv2.addWeighted(img, 0.5, _att_map, 2, 0)
cv2.imwrite(os.path.join(args.vis_dir, os.path.basename(img_path).split('.')[0] + "_" + str(i) + "_" + pred_value_str[i] + ".jpg"), show_attention)
acc_rate = accuracy(predicts, labels)
print("Done, Accuracy: {}".format(acc_rate))
if __name__ == "__main__":
args = get_args(sys.argv[1:])
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus
main_test(args)