-
Notifications
You must be signed in to change notification settings - Fork 25
/
yolov10_cam_speak.py
58 lines (48 loc) · 1.27 KB
/
yolov10_cam_speak.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/home/user/venv/bin/python
## pip install ultralytics
## pip install deep-translator
## pip install gtts
import os
import cv2
from ultralytics import YOLO
from deep_translator import GoogleTranslator
from gtts import gTTS
model = YOLO("yolov10n.pt")
def Speak(text,tl):
# Translate
text = GoogleTranslator(source="auto", target=tl).translate(text=text)
print(text)
## TTS
tts = gTTS(text, lang=tl)
tts.save("gTTS.mp3")
## Speak
os.system("mpg123 -q gTTS.mp3")
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
#frame = cv2.flip(frame, 1) # mirror
## Object Detection
results = model(frame)
## Object Counting
labels = results[0].names
cls = results[0].boxes.cls.tolist()
unique = list(dict.fromkeys(cls))
sl = "en"
text = "There are "
for label in unique:
count = cls.count(label)
text = text + str(count) + " " + labels[int(label)] + ","
#print(text)
Speak(text, "zh-TW")
results[0].save("out.jpg")
img = cv2.imread("out.jpg")
cv2.imshow('webcam', img)
k = cv2.waitKey(1) & 0xFF
if k==ord('s'):
cv2.imwrite("detected.jpg", img)
#Speak(text,sl)
Speak(text, "zh-TW")
if k==27:
break
cap.release()
cv2.destroyAllWindows()