-
Notifications
You must be signed in to change notification settings - Fork 2
/
mouth_features_extraction.py
163 lines (126 loc) · 4.94 KB
/
mouth_features_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# import the necessary packages
from imutils.video import VideoStream
from imutils import face_utils
import datetime
import os
import argparse
import imutils
import time
import dlib
import cv2
import math
import pickle
PATH_TO_LANDMARK_DETECTOR = "./trained_models/shape_predictor_68_face_landmarks.dat"
TEST_NAME = "test_run"
FOLDER_NAME = "./trained_models/"+TEST_NAME
# Source video (0 for live webcam)
# VideoSource = "test_run.avi"
VideoSource = 0
if not os.path.exists(FOLDER_NAME):
os.makedirs(FOLDER_NAME)
def calc_geometric_distance(x1, y1, x2, y2):
# return math.sqrt( (x2-x1)**2 + (y2-y1)**2 ) # Eucledian Distance
return (abs(x1 - x2) + abs(y1 - y2)) # Manhattan Distance
def alignFace(frame, gray, rect, aligner):
(x, y, w, h) = face_utils.rect_to_bb(rect)
faceOrig = imutils.resize(frame[y:y + h, x:x + w], width = 512)
faceAligned = aligner.align(frame, gray, rect)
return faceOrig, faceAligned
# define a dictionary that maps the indexes of the facial
# landmarks to specific face regions
LM = dict({
"mouth_outer": (48, 59),
"mouth_inner": (60, 67),
"mouth": (48, 68),
"right_eyebrow": (17, 22),
"left_eyebrow": (22, 27),
"right_eye": (36, 42),
"left_eye": (42, 48),
"nose": (27, 35),
"jaw": (0, 17)
})
# initialize dlib's face detector (HOG-based) and then create
# the facial landmark predictor
print("[INFO] loading facial landmark predictor...")
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(PATH_TO_LANDMARK_DETECTOR)
# initialize the face aligner
aligner = face_utils.FaceAligner(predictor, desiredFaceWidth=256)
# initialize the video stream and allow the cammera sensor to warmup
print("[INFO] camera sensor warming up...")
if VideoSource == 0:
cap = cv2.VideoCapture(VideoSource)
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
ret,frame = cap.read()
height,width,_ = frame.shape
out = cv2.VideoWriter(os.path.join(FOLDER_NAME, TEST_NAME+'.avi'),fourcc, 30.0,(width,height))
else:
cap = cv2.VideoCapture(os.path.join(FOLDER_NAME, VideoSource))
# height, width = 256, 256
frame_number = -1
global_mouth_feature_list = []
# loop over the frames from the video stream
while True:
frame_number += 1
current_mouth_features = []
# grab the frame from the threaded video stream, resize it to
# have a maximum width of 400 pixels, and convert it to
# grayscale
# frame = vs.read()
ret,frame = cap.read()
if ret == True:
frame = imutils.resize(frame)
if VideoSource == 0:
frame = cv2.flip(frame, flipCode=1)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# detect faces in the grayscale frame
rects = detector(gray, 0)
if len(rects) > 0:
rect = rects[0]
faceOrig, faceAligned = alignFace(frame, gray, rect, aligner)
alignedGray = cv2.cvtColor(faceOrig, cv2.COLOR_BGR2GRAY)
alignedRect = detector(alignedGray, 0)
# print(frame.shape, faceAligned.shape)
if VideoSource == 0:
out.write(frame)
if len(alignedRect) > 0:
alignedRect = alignedRect[0]
# determine the facial landmarks for the face region, then
# convert the facial landmark (x, y)-coordinates to a NumPy
# array
shape = predictor(alignedGray, alignedRect)
shape = face_utils.shape_to_np(shape)
# loop over the (x, y)-coordinates for the facial landmarks
# and draw them on the image
# for idx, (x, y) in enumerate(shape):
# cv2.putText(frame, str(idx), NT_HERSHEY_SIMPLEX, 0.3, (255,255,255))
# cv2.circle(frame, (x, y), 1, (0, 0, 255), -1)
for j in range(LM["mouth_outer"][0], LM["mouth_outer"][1]):
# cv2.line(frame, (shape[j][0], shape[j][1]), (shape[j+1][0], shape[j+1][1]), (255,255,255))
current_mouth_features.append(calc_geometric_distance(shape[j][0], shape[j][1], shape[j+1][0], shape[j+1][1]))
if j == LM["mouth_outer"][1]-1:
# cv2.line(frame, (shape[j+1][0], shape[j+1][1]), (shape[ LM["mouth_outer"][0] ][0], shape[ LM["mouth_outer"][0] ][1]), (255,255,255))
current_mouth_features.append(calc_geometric_distance( shape[j+1][0], shape[j+1][1], shape[ LM["mouth_outer"][0] ][0], shape[ LM["mouth_outer"][0] ][1] ))
for j in range(LM["mouth_inner"][0], LM["mouth_inner"][1]+1):
for k in range(LM["mouth_inner"][0], LM["mouth_inner"][1]+1):
# cv2.line(frame, (shape[j][0], shape[j][1]), (shape[k][0], shape[k][1]), (200, 200, 200))
current_mouth_features.append(calc_geometric_distance( shape[j][0], shape[j][1], shape[k][0], shape[k][1] ))
cv2.imshow("test", faceAligned)
global_mouth_feature_list.append((current_mouth_features, frame_number))
# show the frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
if key == ord(" "):
cv2.imwrite("snapshot.png", frame)
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
else:
break
print(len(global_mouth_feature_list))
pickle.dump(global_mouth_feature_list, open( os.path.join(FOLDER_NAME, TEST_NAME+'.p'), "wb" ))
# do a bit of cleanup
if VideoSource == 0:
out.release()
cap.release()
cv2.destroyAllWindows()