This repository has been archived by the owner on Jan 18, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathgenerate_tfrecord.py
117 lines (87 loc) · 3.7 KB
/
generate_tfrecord.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""
A demonstartion file showing how to generate TensorFlow TFRecord file.
The sample used here is the IBUG data set which is consist of 2 parts:
1. a sample image.
2. 68 facial landmarks.
Usage:
python3 generate_tfrecord.py \
--csv=data/ibug.csv \
--image_dir=path_to_image \
--mark_dir=path_to_marks \
--output_file=data/ibug.record
"""
import json
import os
import sys
import pandas as pd
import tensorflow as tf
from tqdm import tqdm
tf.enable_eager_execution()
# FLAGS, used as interface of user inputs.
flags = tf.app.flags
flags.DEFINE_string('csv', '', 'The csv file contains all file to be encoded.')
flags.DEFINE_string('image_dir', '', 'The path of images directory')
flags.DEFINE_string('mark_dir', '', 'The path of mark files directory')
flags.DEFINE_string('output_file', 'record.record',
'Where the record file should be placed.')
FLAGS = flags.FLAGS
def _int64_feature(value):
"""Returns an int64_list from a bool / enum / int / uint."""
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _float_feature(value):
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
def _float_feature_list(value):
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _bytes_feature(value):
"""Returns a bytes_list from a string / byte."""
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def get_ibug_files(sample_name):
"""Generate sample files tuple"""
image_file = os.path.join(FLAGS.image_dir, sample_name + '.jpg')
mark_file = os.path.join(FLAGS.mark_dir, sample_name + '.json')
return image_file, mark_file
def get_ibug_sample(image_file, mark_file):
"""Create a ibug sample from raw disk files."""
# Keep the filename in the record for debug reasons.
filename = image_file.split('/')[-1].split('.')[-2]
ext_name = image_file.split('/')[-1].split('.')[-1]
# Read encoded image file.
with tf.gfile.GFile(image_file, 'rb') as fid:
encoded_jpg = fid.read()
# Read the marks from a json file.
with open(mark_file) as fid:
marks = json.load(fid)
return {"filename": filename,
"image_format": ext_name,
"image": encoded_jpg,
"marks": marks}
def create_tf_example(ibug_sample):
"""create TFRecord example from a data sample."""
# Get required features ready.
image_shape = tf.image.decode_jpeg(ibug_sample["image"]).shape
# After getting all the features, time to generate a TensorFlow example.
feature = {
'image/height': _int64_feature(image_shape[0]),
'image/width': _int64_feature(image_shape[1]),
'image/depth': _int64_feature(image_shape[2]),
'image/filename': _bytes_feature(ibug_sample['filename'].encode('utf8')),
'image/encoded': _bytes_feature(ibug_sample['image']),
'image/format': _bytes_feature(ibug_sample['image_format'].encode('utf8')),
'label/marks': _float_feature_list(ibug_sample['marks'])
}
tf_example = tf.train.Example(features=tf.train.Features(feature=feature))
return tf_example
def main(_):
"""Entrance"""
tf_writer = tf.python_io.TFRecordWriter(FLAGS.output_file)
samples = pd.read_csv(FLAGS.csv)
for _, row in tqdm(samples.iterrows()):
sample_name = row['file_basename']
img_file, mark_file = get_ibug_files(sample_name)
ibug_sample = get_ibug_sample(img_file, mark_file)
tf_example = create_tf_example(ibug_sample)
tf_writer.write(tf_example.SerializeToString())
if __name__ == '__main__':
tf.app.run()