-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
98 lines (81 loc) · 3.2 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from gtts import gTTS
import pyaudio
import wave
import sys
import keyboard
import os
import openai
from pydub import AudioSegment
from pydub.playback import play
from pydub.effects import speedup
chunk = 1024
# 16 bits per sample
sample_format = pyaudio.paInt16
chanels = 1
# Record at 44400 samples per second
smpl_rt = 44400
openai.api_key = os.getenv("OPENAI_API_KEY")
#check if language is supported and set language variable to the correct language code
# check if args[1] is empty because if it is empty the language variable will be set to english
if(len(sys.argv) == 1 or sys.argv[1] == "en" or sys.argv[1] == "english" or sys.argv[1] == "englisch"):
language = "en"
elif(sys.argv[1] == "de" or sys.argv[1] == "german" or sys.argv[1] == "deutsch"):
language = "de"
else:
print("Language not supported")
exit()
#function to speak the output
def speak(s):
myoutput = gTTS(text=s, lang=language, slow=False)
myoutput.save("output.mp3")
tts = AudioSegment.from_mp3('output.mp3')
output = speedup(tts,1.5,150)
output.export("file.mp3", format="mp3")
play(output)
os.remove('output.mp3')
pass
if __name__ == "__main__":
message_history = [{"role": "system", "content": "You are Karin, an AI assistant. You give short answers to questions and never say more then 3 sentences."}]
# Create an interface to PortAudio
pa = pyaudio.PyAudio()
if(language == "de"):
speak("Hallo, ich bin Karin. Wie kann ich dir helfen?")
else:
speak("Hello, I am Karin. How can I help you?")
while True:
frames = []
if(keyboard.read_key() == "r"):
#recording
print('Recording...')
stream = pa.open(format=sample_format, channels=chanels,
rate=smpl_rt, input=True,
frames_per_buffer=chunk)
while(keyboard.is_pressed("r")):
data = stream.read(chunk)
frames.append(data)
stream.stop_stream()
stream.close()
#transcripe to wav file and save it
print('processing...')
sf = wave.open("x.wav", 'wb')
sf.setnchannels(chanels)
sf.setsampwidth(pa.get_sample_size(sample_format))
sf.setframerate(smpl_rt)
sf.writeframes(b''.join(frames))
sf.close()
#transcribe the audio file
print("Transcribe...")
file = open("x.wav", "rb")
transcription = openai.Audio.transcribe("whisper-1", file)
message_history.append({"role": "user", "content": transcription['text']})
print(transcription['text'])
#call chatGPT api function
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=message_history)
#save the completion as a message
message_history.append({"role": "system", "content": completion['choices'][0]['message']['content']})
#print the output and speak it
print("\n ------------------ \n")
print(completion['choices'][0]['message']['content'])
speak(completion['choices'][0]['message']['content'])