-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathface_model.py
188 lines (155 loc) · 6.27 KB
/
face_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import cv2
import numpy as np
from ultralytics import YOLO
import threading
class FaceDetection:
def __init__(self, model_path, label_path, resolution, os_type="windows", camera_index=0):
self.model = YOLO(model_path)
self.class_list = self.load_labels(label_path)
# Capture for with respect to the OS
if os_type == "windows":
self.cap = cv2.VideoCapture(camera_index, cv2.CAP_DSHOW)
elif os_type == "linux":
self.cap = cv2.VideoCapture(camera_index, cv2.CAP_V4L2)
else:
# This is for macos
self.cap = cv2.VideoCapture(camera_index, cv2.CAP_AVFOUNDATION)
# Video capture for linux
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, resolution[0])
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, resolution[1])
self.cap.set(cv2.CAP_PROP_FPS, 30)
self.is_running = True
self.frame = None
self.lock = threading.Lock()
self.batch_size = 5
self.frames = []
self.current_zoom_factor = 1.0
def load_labels(self, label_path):
'''
Load class labels from a file.
'''
with open(label_path, "r") as file:
data = file.read().strip()
return data.split("\n")
def capture_frame(self):
'''
Capture a single frame from the video feed.
'''
success, frame = self.cap.read()
if success:
return frame
else:
print("Error: Unable to capture frame")
return None
def process_frames(self, frames):
'''
Process a batch of frames using the YOLO model.
'''
batch_results = []
for frame in frames:
results = self.model(frame, stream=True)
batch_results.append(results)
return batch_results
def detect_faces(self, frame, results):
'''
Detect faces in the frame and display the frame with bounding boxes and labels.
'''
all_boxes = []
for r in results:
boxes = r.boxes
if not boxes:
continue
for box in boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0])
all_boxes.append((x1, y1, x2, y2))
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 2)
cls = int(box.cls[0])
confidence = round(float(box.conf[0].cpu()), 2)
label = f"{self.class_list[cls]}: {confidence}"
cv2.putText(frame, label, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1, cv2.LINE_AA)
return frame, all_boxes
def get_face_centers(self, all_boxes):
'''
Get the center coordinates of all detected faces.
'''
centers = []
for (x1, y1, x2, y2) in all_boxes:
center_x = (x1 + x2) // 2
center_y = (y1 + y2) // 2
centers.append((center_x, center_y))
return centers
def calculate_zoom(self, all_boxes, frame_shape):
'''
Calculate the zoom factor based on the size of the detected faces.
'''
target_face_size = 0.4
zoom_threshold = 0.01
smoothing_factor = 0.05
min_x = min(x1 for x1, _, _, _ in all_boxes)
min_y = min(y1 for _, y1, _, _ in all_boxes)
max_x = max(x2 for _, _, x2, _ in all_boxes)
max_y = max(y2 for _, _, _, y2 in all_boxes)
face_center_x = (min_x + max_x) // 2
face_center_y = (min_y + max_y) // 2
box_height = max_y - min_y
actual_box_size = box_height / frame_shape[0]
desired_zoom_factor = self.current_zoom_factor
if actual_box_size < target_face_size:
desired_zoom_factor = min(1.5, self.current_zoom_factor * (target_face_size / actual_box_size))
elif actual_box_size > target_face_size:
desired_zoom_factor = max(0.5, self.current_zoom_factor * (target_face_size / actual_box_size))
if abs(desired_zoom_factor - self.current_zoom_factor) > zoom_threshold:
self.current_zoom_factor += smoothing_factor * (desired_zoom_factor - self.current_zoom_factor)
# Ensure zoom is not too close or too far
self.current_zoom_factor = min(max(self.current_zoom_factor, 0.75), 1.5)
return face_center_x, face_center_y, self.current_zoom_factor
def smooth_zoom(self, frame, center_x, center_y, zoom_factor):
'''
Smoothly zoom into the center of the frame.
'''
h, w, _ = frame.shape
new_w = int(w / zoom_factor)
new_h = int(h / zoom_factor)
x1 = max(0, min(center_x - new_w // 2, w - new_w))
y1 = max(0, min(center_y - new_h // 2, h - new_h))
x2 = min(w, x1 + new_w)
y2 = min(h, y1 + new_h)
cropped_frame = frame[y1:y2, x1:x2]
resized_frame = cv2.resize(cropped_frame, (w, h))
return resized_frame
def calculate_zoomed_in_box(self, frame, center_x, center_y, zoom_factor):
'''
Calculate the bounding box for the zoomed-in region.
'''
h, w, _ = frame.shape
new_w = int(w / zoom_factor)
new_h = int(h / zoom_factor)
x1 = max(0, min(center_x - new_w // 2, w - new_w))
y1 = max(0, min(center_y - new_h // 2, h - new_h))
x2 = min(w, x1 + new_w)
y2 = min(h, y1 + new_h)
return x1, y1, x2, y2
def calculate_offsets(self, all_boxes, frame_shape):
'''
Calculate the offset of the combined face center from the frame center.
'''
frame_center_x = frame_shape[1] // 2
frame_center_y = frame_shape[0] // 2
if not all_boxes:
return (0, 0)
min_x = min(x1 for x1, _, _, _ in all_boxes)
min_y = min(y1 for _, y1, _, _ in all_boxes)
max_x = max(x2 for _, _, x2, _ in all_boxes)
max_y = max(y2 for _, _, _, y2 in all_boxes)
combined_center_x = (min_x + max_x) // 2
combined_center_y = (min_y + max_y) // 2
offset_x = combined_center_x - frame_center_x
offset_y = combined_center_y - frame_center_y
return offset_x, offset_y
def release_resources(self):
'''
Release the video capture and destroy all OpenCV windows.
'''
self.cap.release()
cv2.destroyAllWindows()