-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
89 lines (75 loc) · 3.32 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import torch
from pynput import keyboard
import SpeechToText_OnKlickClass
import sounddevice as sd
def on_press(key):
global key_pressed
try:
# Add the key to the set of pressed keys
if hasattr(key, 'char') and key.char is not None:
print(f'Character key pressed: {key.char}')
else:
print(f'Special key pressed: {key}')
if key == keyboard.Key.esc:
print("shutting down")
SpeechToText.close()
return False
elif key == keyboard.Key.f4:
if not key_pressed:
key_pressed = True
print("Start transcribing...")
SpeechToText.start_recording_multiple(10)
else:
print("Stop transcribing ...")
key_pressed = False
SpeechToText.close()
except AttributeError:
# Handle special keys (like Shift, Ctrl, etc.)
print(f'Special key pressed: {key}')
if __name__ == '__main__':
# Variable to track if the key is pressed
key_pressed = False
device1 = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
device2 = torch.device("cuda:1") if torch.cuda.is_available() else torch.device("cpu")
print("Start")
SpeechToText = SpeechToText_OnKlickClass.SpeechToText(device1, True)
print("Done Loading AIs: Start")
while True:
# Prompt the user for input
user_input = input(
"if you want to transcribe a soundfile or all soundfiles within a folder, type a valid file/folerpath, otherwise push the ANY button. ")
# Check if the user provided a file path or pressed 's'
if os.path.exists(user_input):
file_path = user_input
print(f"Transcribing the file at {file_path}...")
SpeechToText.transcribeFile(file_path,15)
# Here you would add the transcription logic
print("Transcription complete!")
else:
break
print("Interactive mode:")
user_input = input(
"to display and select one or more input devices press 'i'. To continue with default recording device push the ANY button. ")
if user_input.lower() == "i":
devices = sd.query_devices()
for i, device in enumerate(devices):
print(
f"Device {i}: {device['name']} - Input Channels: {device['max_input_channels']}, Output Channels: {device['max_output_channels']}")
while True:
devices_input = input(
"type the device numbers that should jointly record and transcribe (example: '0 3' without ' ) ")
try:
# Split the input string by spaces and convert to a list of integers
devicesN = [int(num) for num in devices_input.split()]
SpeechToText.set_input_devices(devicesN)
print("Selected:", devicesN)
break
except ValueError:
print("Invalid input. Please enter numbers separated by spaces.")
else:
SpeechToText.set_input_devices()
print("Key listener is active. Start/Pause transcription by pressing f4. Close program with ESC")
# Get the default input device
with keyboard.Listener(on_press=on_press) as listener:
listener.join()