-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
192 lines (156 loc) · 7.11 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import mediapipe as mp
import cv2
import numpy as np
import pyautogui
import sys
import time
# Initialization safety settings
pyautogui.FAILSAFE = True
pyautogui.PAUSE = 0.1
def init_camera():
"""Initialize camera with multiple attempts and different backends"""
backends = [cv2.CAP_AVFOUNDATION] # MacOS-specific backend
for backend in backends:
print(f"Trying camera with backend {backend}")
cap = cv2.VideoCapture(0, backend)
if cap is None or not cap.isOpened():
print(f"Failed to open camera with backend {backend}")
continue
# Try to read a test frame
ret, frame = cap.read()
if ret and frame is not None:
print("Successfully initialized camera")
return cap
cap.release()
raise RuntimeError("Failed to initialize camera with any backend")
def main():
try:
# Get screen dimensions
screen_width, screen_height = pyautogui.size()
print(f"Screen dimensions: {screen_width}x{screen_height}")
# Initialize MediaPipe
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
print("Initializing camera...")
cap = init_camera()
# Read one frame to get dimensions
ret, frame = cap.read()
if not ret:
raise RuntimeError("Could not read initial frame")
# Set frame size
frame_height, frame_width = frame.shape[:2]
print(f"Frame size: {frame_width}x{frame_height}")
# Define ROI (Region of Interest)
left = int(frame_width * 0.2)
right = int(frame_width * 0.8)
top = int(frame_height * 0.2)
bottom = int(frame_height * 0.8)
# Initialize hand detection
hands = mp_hands.Hands(
static_image_mode=False,
max_num_hands=1,
min_detection_confidence=0.7,
min_tracking_confidence=0.5
)
print("Starting main loop...")
# Initialize click states
is_left_clicked = False
is_right_clicked = False
click_start_time = None
click_duration = 0.2 # 0.2 seconds for a click
click_threshold = 0.1 # 0.1 distance threshold for click
right_click_threshold = 0.1 # distance threshold for right-click
# Help text
help_text = [
"Gestures:",
"- Bring thumb and index finger close to left click",
"- Bring thumb and ring finger close to right click",
"- Move hand to control cursor",
"Press ESC to exit"
]
while True:
ret, frame = cap.read()
if not ret or frame is None:
print("Failed to read frame")
break
# Basic frame processing
frame = cv2.flip(frame, 1)
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Process hands
results = hands.process(rgb_frame)
# Draw ROI
cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
# Display help text
y = 30
for text in help_text:
cv2.putText(frame, text, (10, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
y += 25
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
# Draw hand landmarks
mp_drawing.draw_landmarks(
frame,
hand_landmarks,
mp_hands.HAND_CONNECTIONS
)
# Get wrist and middle finger tip for cursor control
wrist = hand_landmarks.landmark[mp_hands.HandLandmark.WRIST]
middle_finger_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
cursor_x = int((wrist.x + middle_finger_tip.x) / 2 * frame_width)
cursor_y = int((wrist.y + middle_finger_tip.y) / 2 * frame_height)
if left <= cursor_x <= right and top <= cursor_y <= bottom:
screen_x = (cursor_x - left) / (right - left) * screen_width
screen_y = (cursor_y - top) / (bottom - top) * screen_height
try:
pyautogui.moveTo(
int(screen_x),
int(screen_y),
duration=0.1,
_pause=False
)
except pyautogui.FailSafeException:
print("FailSafe triggered - mouse in corner")
# Check for left click gesture
thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]
index_finger_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
distance = np.linalg.norm([thumb_tip.x - index_finger_tip.x, thumb_tip.y - index_finger_tip.y])
if distance < click_threshold and not is_left_clicked:
print("Left click detected")
is_left_clicked = True
click_start_time = time.time()
pyautogui.click(button='left')
elif is_left_clicked and time.time() - click_start_time > click_duration:
is_left_clicked = False
# Check for right click gesture
ring_tip = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP]
distance_right_click = np.linalg.norm([thumb_tip.x - ring_tip.x, thumb_tip.y - ring_tip.y])
if distance_right_click < right_click_threshold and not is_right_clicked:
print("Right click detected")
is_right_clicked = True
click_start_time = time.time()
pyautogui.click(button='right')
elif is_right_clicked and time.time() - click_start_time > click_duration:
is_right_clicked = False
# Display frame
cv2.imshow('Hand Gesture Control', frame)
# Check for exit
if cv2.waitKey(1) & 0xFF == 27: # ESC
print("ESC pressed, exiting...")
break
except Exception as e:
print(f"Error occurred: {str(e)}")
import traceback
traceback.print_exc()
finally:
print("Cleaning up...")
try:
cap.release()
cv2.destroyAllWindows()
print("Cleanup completed")
except Exception as e:
print(f"Error during cleanup: {e}")
if __name__ == "__main__":
print(f"OpenCV version: {cv2.__version__}")
print(f"Python version: {sys.version}")
main()