-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_nmnist_testonly.py
147 lines (124 loc) · 5.51 KB
/
make_nmnist_testonly.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
""" Make Nosaic MNIST in TFRecord format.
Make sure to make both training and test datasets (see line 26).
To load the TFR, see, e.g., `read_tfrecords_nosaic_mnist` in ./datasets/data_prossessing.py.
This script is the Step 1 in
1. Generate Nosaic MNIST (`python make_nmnist.py`).
2. Extract and save features in TFRecords format (save_featureTFR_nmnist.ipynb).
3. Plot SAT curve (plot_SAT_curve.ipynb)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os, time
import PIL
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
############## USER DEFINED #####################
data_dir = "./data-directory/tensorflow_datasets"
# MNIST will be downloaded to this directory
phase = ["test"]
record_file_prefix = "./data-directory/nosaic_mnist"
# TFR name
#################################################
# Functions
def set_gpu_devices(gpu):
tf.config.experimental.set_visible_devices(physical_devices[gpu], 'GPU')
tf.config.experimental.set_memory_growth(physical_devices[gpu], True)
def np_to_tfr_nosaic_mnist(x, y, writer):
"""Save a np.array to a tfrecord file. DO NOT FORGET writer.close().
Args:
x: data: np.ndarray, dtype=uint8
y: label: int, dtype=int64
writer: tf.io.TFRecordWriter object. Don't forget writer.close()
"""
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _float_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
# Make an Example object that has one record data
example = tf.train.Example(features=tf.train.Features(feature={
'video': _bytes_feature(x.tostring()),
'label': _int64_feature(y)
}))
# Serialize the example object and make a TFRecord file
writer.write(example.SerializeToString())
def fix_random_seed(flag_seed, seed=None):
if flag_seed:
np.random.seed(seed)
tf.random.set_seed(seed)
print("Numpy and TensorFlow's random seeds fixed: seed=" + str(seed))
else:
print("Random seed not fixed.")
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
print('Found GPU(s) for acceleration.')
set_gpu_devices(0) # the first GPU (numbered 0) will be used
else:
print("No GPU hardware devices are available, running on CPUs...")
fix_random_seed(True, 7)
# Load MNIST as numpy array
############################
dstr, dsts = tfds.load(name="mnist", data_dir=data_dir, split=["train", "test"], batch_size=-1)
images_train = dstr["image"].numpy() # (60000, 28, 28, 1), np.uint8
images_test = dsts["image"].numpy() # (60000,), np.int64
labels_train = dstr["label"].numpy() # (10000, 28, 28, 1), np.uint8
labels_test = dsts["label"].numpy() # (10000,), np.int64
for train_or_test in phase:
print('start working on {} phase.'.format(train_or_test))
# train or test
if train_or_test == "train":
images_make = images_train
labels_make = labels_train
elif train_or_test == "test":
images_make = images_test
labels_make = labels_test
else:
raise ValueError(train_or_test)
record_file = record_file_prefix + "_{}.tfrecords".format(train_or_test)
# Make training "OR" test data of nosaic MNIST
############################################################
if os.path.exists(record_file):
raise ValueError("record_file exists. Remove or rename it.")
with tf.io.TFRecordWriter(record_file) as writer:
cnt = 1
for image, label in zip(images_make, labels_make):
# Verbose
if cnt % 10000 == 0:
print("Iteration {}/{}".format(cnt, len(images_make)))
cnt += 1
# Reshape
image_reshape = np.reshape(image, (28, 28))
# 1. Generate a sequence of offsets of nosaic
########################################
# Remove 40 or 28 noisy pixels in each step (40 * 19 + 28 = 784(= 28*28*1))
idx_perm = np.random.permutation(784)
idx_perm_split = [idx_perm[40*k: 40*k+40] for k in range(0, 20)] # (20, 28 if last cell (offset=19) else 40 )
# 2. Make masks that are filled with 255
########################################
masks = [None]*20
bkgd = np.array([255]*784, dtype=np.uint32)
for i, offsets in enumerate(idx_perm_split):
if i == 0:
masks[i] = bkgd - 0
else:
masks[i] = masks[i-1] - 0
masks[i][offsets] = masks[i][offsets] - 255
# Reshape
for i, mask in enumerate(masks):
masks[i] = np.reshape(mask, (28,28))
# 3. Synthesize the mask and the image
########################################
masked_images = [None]*20
image_org = np.uint32(image_reshape)
for i, mask in enumerate(masks):
masked_images[i] = np.uint8(np.clip(image_org + mask, 0, 255))
# Reshape
video = np.reshape(masked_images, (20, 28, 28, 1)) # from (20,28,28) to (20,28,28,1)
# 4. Save images
########################################
label = np.int64(label)
np_to_tfr_nosaic_mnist(x=video, y=label, writer=writer)