forked from danduncan/HappyNet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gather_training_data.py
310 lines (249 loc) · 8.65 KB
/
gather_training_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
#############################################################################################
#
# This is a program used to generate your own custom dataset of labeled emotions.
# It uses the webcam and prompts the user to make an emotion.
# When the user is ready, they press ENTER, and the webcam saves a snapshot of their emotion.
# All files are saved using the Japanese Female Facial Expressions (JAFFE) dataset naming
# convention.
#
# Instructions:
# - Execute as python script
# - If working properly, a window will pop up with a feed from your webcam
# Note: This does not appear to work on virtual machines. It was used on a Macbook.
# - Follow the prompts on the shell screen
# - When pressing a button, make sure the video screen is selected, not the text screen.
# The video screen is the one taking your text inputs
#
# Possible text inputs:
# ENTER - save image
# SPACE - Skip to next emotion without saving
# ESC - Quit the program
#
#
# Date modified: March 2016
#
# Authors: Dan Duncan
# Gautam Shine
#
#############################################################################################
import os, shutil, sys, time, re, glob
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
import Image
import caffe
import contextlib, cStringIO
import random
from caffe_functions import *
from opencv_functions import *
from utility_functions import *
from PIL import Image
#############################################################################################
#
# HELPER FUNCTIONS
#
#############################################################################################
# Filename format looks like:
# training_data/0000000.HA.0.png
# outDir is self-explanatory
# counter gets converted into string of length 7, with leading 0's
# Label is the two-character emotion label
# subCount is a number label for jittered images
# Extension is the filetype
def getFilename(counter,subCount=0,outDir=None,strLength=7,label='HA',extension='.png'):
if outDir is None:
outDir = ""
else:
outDir += '/'
if subCount is None:
subCount = ""
else:
subCount = "." + str(subCount)
numStr = str(counter).zfill(strLength)
return outDir + numStr + "." + label + subCount + extension
# Suppress print statements within a function call
# Just call:
# with nostdout():
# yourfunction();
@contextlib.contextmanager
def nostdout():
save_stdout = sys.stdout
sys.stdout = cStringIO.StringIO()
yield
sys.stdout = save_stdout
# Get a randon emotion label
def getRandomLabel(pickFrom=None):
if pickFrom is None:
pickFrom = [0,1,2,3,4,5,6]
return random.choice(pickFrom)
# Crop image and save to file
def saveSingleImage(frame,file):
# Save cropped image. Can also rescale cropbox
im = Image.fromarray(toggleRGB(frame))
im.save(file)
# Crop and save image, including adding jitter
def saveAcceptedImage(frame,faces,counter,outDir=None,strLength=7,label='HA',extension='.png',jitter=False):
if jitter:
frames = jitterImage(frame,faces)
else:
frames = [imgCrop(frame,faces[0])]
subCount = 0
for frame in frames:
filename = getFilename(counter,outDir=outDir,subCount=subCount,strLength=strLength,label=suf,extension=extension)
saveSingleImage(frame,filename)
subCount += 1
# Jitter an image
# Returns several jittered versions of the input image
def jitterImage(frame,faces):
# Define constants
numShiftMax = 4; # Number of shifted images to produce
numColorMax = 6; # Number of color-shifted images to produce
maxShift = 0.1 # Maximum pixel displacement in x and y directions
maxColorShift = 30; # Raw pixel shift
# Frame width and height
fw = frame.shape[1]
fh = frame.shape[0]
x,y,w,h = faces[0]
frames = []; # Will hold output jittered images
# Return original unjittered image
frames.append(frame[y:y+h,x:x+h])
# Shift image by up to 10% of cropbox size in each direction
shiftCount = 0
while shiftCount < numShiftMax:
# Generate shifts: -0.1 < shift < .1
xshift = np.random.uniform(0.0,maxShift*2) - maxShift
yshift = np.random.uniform(0.0,maxShift*2) - maxShift
# Apply shifts
xt = x + int(xshift*w)
yt = y + int(yshift*h)
# Verify shifts are within limits
if xt >= 0 and yt >= 0 and xt+w < fw and yt+h < fh:
# New values are ok
frames.append(frame[yt:yt+h,xt:xt+w])
shiftCount += 1
# Brighten or darken image uniformly
# Raw pixel values are 0 to 255
for i in range(numColorMax):
shift = random.randint(0,2*maxColorShift) - maxColorShift/2
ftmp = frame.astype(np.int) + shift
# Make sure ftmp does not exceed 0 and 255
ftmp[ftmp < 0] = 0
ftmp[ftmp > 255] = 255
# Add new image to output
ftmp = ftmp.astype(np.uint8)
frames.append(ftmp[yt:yt+h,xt:xt+w])
return frames
################################################################################################
#
# START SCRIPT
#
#################################################################################################
# Pick mode (train or validate)
validationMode = False
# Pick output size in pixels, of all cropped images (images are all square)
imgSize = 200;
boxScale = 1.2 # Size of crop boxes (relative to original filter size)
jitter = True; # Jitter accepted images?
# Initialize all labels
categories = [ 'Angry' , 'Disgust' , 'Fear' , 'Happy' , 'Neutral' , 'Sad' , 'Surprise']
suffixes = [ 'AN', 'DI', 'FE', 'HA', 'NE', 'SA', 'SU']
pickFrom = [ 2, 4, 5 ] # Only prompt user for emotions in this list
if validationMode:
jitter = False
outDir = 'datasets/validation_images'
else:
jitter = True
outDir = 'datasets/training_images'
counter = 300 # Used to increment filenames
# Output filename configuration:
strLength = 7; # Length of output filename number string
extension = '.png' # Output file type
# Check that outDir and counter are properly initialized
print "\n"
if not os.path.exists(outDir):
print "Output directory does not exist. Making directory"
os.mkdir(outDir)
else:
print "Output directory already exists"
numCheck = 1000; # Number of filenames to check before giving up
num = 0;
while True:
strCheck = getFilename(counter,outDir=outDir,label="*")
print "Checking: " + strCheck
if glob.glob(strCheck):
print "\tError: File exists. Incrementing counter"
counter += 1
num += 1
else:
print "First valid file is: " + strCheck
break
if num > numCheck:
print "ERROR: No available filename up to " + strCheck + " could be found."
sys.exit(0)
# Set up face detection
faceCascades = load_cascades()
# Set up display window
cv.namedWindow("preview")
# Open input video steam
vc = cv.VideoCapture(0)
# Check that video stream is running
if vc.isOpened(): # try to get the first frame
rval, frame = vc.read()
#frame = frame.astype(np.float32)
else:
rval = False
print "\n"
nextEmotion = True
while rval:
if nextEmotion: # Generate next emotion
nextEmotion = False
# Generate a random integer label
intLabel = getRandomLabel(pickFrom)
# Get emotion string and file suffyx
emotion = categories[intLabel]
suf = suffixes[intLabel]
# Print prompt to user:
print "Emotion is: " + emotion + ".\t(ENTER to capture, SPACE to skip)"
# Read in next frame
rval, frame = vc.read()
# Mirror image
frame = np.fliplr(frame)
# Detect faces
# Find all faces
with nostdout():
newFrame, faces = DetectFace(frame,True,faceCascades,single_face=False,second_pass=False,draw_rects=True,scale=boxScale)
oneFace = False
if faces is None or len(faces) == 0:
# Poor input: do nothing to frame
#newFrame = frame
pass
elif len(faces) > 1:
# Too many faces found
pass
else:
# Just the right number of faces found
oneFace = True
# Show video with or without boxed face
cv.imshow("preview", newFrame)
# Wait for user to press key. On ESC, close program
key = cv.waitKey(20)
if key == 27: # ESC --> exit on ESC
print 'ESC was pressed! Quitting...'
break
elif key == 32: # SPACE --> Next image
print 'Label skipped'
nextEmotion = True
continue; # Break out of loop
elif key == 13: # ENTER --> Accept image
if not oneFace:
print "Error: ENTER pressed, but face invalid. Keep trying..."
print "Emotion is: " + emotion + ".\t(ENTER to capture, SPACE to skip)"
else:
saveAcceptedImage(frame,faces,counter,outDir=outDir,strLength=strLength,label=suf,extension=extension,jitter=jitter)
print 'Image accepted and saved!'
counter += 1
nextEmotion = True
continue; # Break out of loop
else: # Invalid key, ignore
pass
cv.destroyWindow("preview")