-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
122 lines (105 loc) · 5.28 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import tensorflow as tf
import numpy as np
import cv2
import os
import sys
import tqdm
import lmdb
import six
from PIL import Image
from data_provider.data_utils import get_vocabulary
from utils.transcription_utils import idx2label, calc_metrics
from model import Model
from utils.visualization import heatmap_visualize
from config import get_args
def get_images(images_dir):
files = []
exts = ['jpg', 'png', 'jpeg', 'JPG']
for parent, dirnames, filenames in os.walk(images_dir):
for filename in filenames:
for ext in exts:
if filename.endswith(ext):
files.append(os.path.join(parent, filename))
break
print('Find {} images'.format(len(files)))
return files
def resize_pad_img(image, height, width, keep_ratio=True):
H, W, C = image.shape
# Rotate the vertical images
if H > 4 * W:
image = np.rot90(image)
H, W = W, H
if keep_ratio:
new_width = int((1.0 * height / H) * W)
new_width = new_width if new_width < width else width
new_height = height
img_resize = np.zeros((height, width, C), dtype=np.uint8)
image = cv2.resize(image, (new_width, new_height))
img_resize[:, :new_width, :] = image
else:
img_resize = cv2.resize(image, (width, height))
new_width = width
img_raw = img_resize
img_resize = np.expand_dims(cv2.cvtColor(img_resize, cv2.COLOR_RGB2GRAY), axis=-1)
return img_resize, new_width, img_raw
def data_preprocess(image, word, char2id, args):
img_resize, new_width, img_raw = resize_pad_img(image, args.height, args.width, args.keep_ratio)
label = np.full((args.max_len), char2id['EOS'], dtype=np.int)
label_list = []
for char in word:
if char in char2id:
label_list.append(char2id[char])
else:
continue
if len(label_list) > (args.max_len - 1):
label_list = label_list[:(args.max_len - 1)]
label[:len(label_list)] = np.array(label_list)
return img_resize, label, new_width, img_raw
def main_test_lmdb(args):
voc, char2id, id2char = get_vocabulary(voc_type=args.voc_type)
input_images = tf.placeholder(dtype=tf.float32, shape=[1, args.height, args.width, 1], name="input_images")
input_labels = tf.placeholder(dtype=tf.int32, shape=[1, args.max_len], name="input_labels")
model = Model(num_classes=len(voc), num_block=args.num_block, embed_dim=args.embed_dim, att_dim=args.att_dim, num_head=args.num_head, num_decoder=args.num_decoder, hidden_units=args.hidden_units, seq_len=args.max_len, is_training=False)
enc_logits, at_logits, enc_pred, at_pred, enc_attention_weights, at_attention_weights, _, _ = model(input_images, input_labels, reuse=False)
global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False, dtype=tf.int32)
variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
saver = tf.train.Saver(variable_averages.variables_to_restore())
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
ckpt_state = tf.train.get_checkpoint_state(args.checkpoints)
model_path = os.path.join(args.checkpoints, os.path.basename(ckpt_state.model_checkpoint_path))
print('Restore from {}'.format(model_path))
saver.restore(sess, model_path)
print("Checkpoints step: {}".format(global_step.eval(session=sess)))
env = lmdb.open(args.test_data_dir, readonly=True)
txn = env.begin()
num_samples = int(txn.get(b"num-samples").decode())
predicts = []
labels = []
for i in tqdm.tqdm(range(1, num_samples+1)):
image_key = b'image-%09d' % i
label_key = b'label-%09d' % i
imgbuf = txn.get(image_key)
buf = six.BytesIO()
buf.write(imgbuf)
buf.seek(0)
img_pil = Image.open(buf).convert('RGB')
img = np.array(img_pil)
label = txn.get(label_key).decode()
labels.append(label)
img, la, width, img_ = data_preprocess(img, label, char2id, args)
pred_value, attention_weights_value = sess.run([enc_pred, enc_attention_weights], feed_dict={input_images: [img], input_labels: [la]})
pred_value_str = idx2label(pred_value, id2char, char2id)[0]
predicts.append(pred_value_str)
if args.vis_dir != None and args.vis_dir != "":
os.makedirs(args.vis_dir, exist_ok=True)
os.makedirs(os.path.join(args.vis_dir, "errors"), exist_ok=True)
_ = heatmap_visualize(img_, attention_weights_value, pred_value_str, args.vis_dir, "{}.jpg".format(i))
if pred_value_str.lower() != label.lower():
_ = heatmap_visualize(img_, attention_weights_value, pred_value_str,
os.path.join(args.vis_dir, "errors"), "{}.jpg".format(i))
metrics_value = calc_metrics(predicts, labels, args.metrics_type)
print("Done, {}: {}".format(args.metrics_type, metrics_value))
if __name__ == "__main__":
args = get_args(sys.argv[1:])
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus
main_test_lmdb(args)