-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
220 lines (193 loc) · 10.3 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
from telegram import Update
from telegram.ext import ApplicationBuilder, ContextTypes, CommandHandler, MessageHandler, filters
import vocode
from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
from vocode.turn_based.synthesizer.stream_elements_synthesizer import StreamElementsSynthesizer
from vocode.turn_based.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
from vocode.turn_based.synthesizer.play_ht_synthesizer import PlayHtSynthesizer
from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.turn_based.synthesizer.coqui_synthesizer import CoquiSynthesizer
from vocode.turn_based.synthesizer.coqui_tts_synthesizer import CoquiTTSSynthesizer
from vocode.turn_based.synthesizer.rime_synthesizer import RimeSynthesizer
from pydub import AudioSegment
import io
import logging
import os
import subprocess
import inspect
from typing import Tuple
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
# Synthesizer class must be one of the following (do not change without adding support):
supported_classes = [StreamElementsSynthesizer, ElevenLabsSynthesizer, PlayHtSynthesizer, AzureSynthesizer, CoquiSynthesizer, CoquiTTSSynthesizer, RimeSynthesizer]
''' CONFIGURATION START '''
# Required environment variables containing API key: OPENAI_API_KEY, TELEGRAM_BOT_KEY, and your Vocode synthesizers classes corresponding API key variable
# Your chosen synthesizer provider's corresponding Vocode turn_based class instance
SYNTH = ElevenLabsSynthesizer('mqRQRcVDz2T5TcqF5vGi',
api_key=os.environ['ELEVEN_LABS_API_KEY'])
# An dict of voices you want the user to be able to choose from, for each entry the key=name_displayed_to_user and value=provider_voice_id
# the voice display name must not contain spaces and two names must not be equivalent on a case-insensitive basis (the user is still shown the capitalization but can select names case-insensitively). The language model will also be given the display name as part of its identity.
VOICE_NAME_TO_ID = {
'BarackObama': 'zzHoZ9isaKwC4fEv7Idf',
'JoeBiden': 'mqRQRcVDz2T5TcqF5vGi'
}
# instructions to the language model responsible for generating response transcript.
SYSTEM_PROMPT = 'You are having a voice-to-voice conversation with a user. Always pretend you are a real human. Do not avoid or decline to answer questions because you are not human.'
''' CONFIGURATION END '''
# will be passed the voice display name.
SYSTEM_PROMPT += 'You are {0}. Act like {0} and identify yourself as {0}.'
def assert_correct_names(names):
lower_name_set = set()
for name in names:
assert ' ' not in name, 'Voice display names cannot have spaces!'
lower_name = name.lower()
assert (
lower_name not in lower_name_set
), 'Voice display names cannot have duplicates, this requirement is case-insensitive!'
lower_name_set.add(lower_name)
assert_correct_names(VOICE_NAME_TO_ID.keys())
# key=synth class, value=attribute that chooses the provider voice
voice_attr_of = {
ElevenLabsSynthesizer: 'voice_id',
PlayHtSynthesizer: 'voice',
StreamElementsSynthesizer: 'voice',
AzureSynthesizer: 'voice_name',
CoquiSynthesizer: 'voice_id',
CoquiTTSSynthesizer: 'speaker',
RimeSynthesizer: 'speaker'
}
assert set(voice_attr_of.keys()) == set(supported_classes), 'supported_classes must match the keys of voice_attr_of!'
assert type(SYNTH) in voice_attr_of.keys(), 'Synthesizer class must be one of the supported ones!'
# check voice_attr_of is correct by asserting all classes have their corresponding value as a parameter in the init function
for key, value in voice_attr_of.items():
assert value in inspect.signature(key.__init__).parameters
class VocodeBotResponder():
def __init__(self, transcriber, system_prompt, synthesizer, voice_name_to_id=None):
self.transcriber = transcriber
self.system_prompt = system_prompt
self.synthesizer = synthesizer
self.chat_ids = []
if voice_name_to_id:
self.voice_name_to_id = voice_name_to_id
else:
voice = getattr(self.synthesizer, voice_attr_of[self.synthesizer])
self.voice_name_to_id = {voice: voice}
self.voice_id_to_name = {v_id: v_name for v_name, v_id in self.voice_name_to_id.items()}
self.agents = {}
def get_agent(self, chat_id):
if chat_id not in self.agents:
self.agents[chat_id] = {}
for voice_name in self.voice_name_to_id.keys():
self.agents[chat_id][voice_name] = ChatGPTAgent(system_prompt=self.system_prompt.format(voice_name))
return self.agents[chat_id][self.get_current_voice()]
def get_response(self, chat_id, voice_input: AudioSegment) -> Tuple[str, AudioSegment]:
transcript = self.transcriber.transcribe(voice_input)
agent_response = self.get_agent(chat_id).respond(transcript)
synth_response = self.synthesizer.synthesize(agent_response)
return agent_response, synth_response
def get_current_voice(self):
current_voice_id = getattr(self.synthesizer,
voice_attr_of[type(self.synthesizer)])
return self.voice_id_to_name[current_voice_id]
async def handle_telegram_start(self, update: Update,
context: ContextTypes.DEFAULT_TYPE):
start_text = '''
I'm a voice chatbot, send a voice message to me and I'll send one back!" Use /help to see available commands.
'''
await context.bot.send_message(chat_id=update.effective_chat.id,
text=start_text)
async def handle_telegram_voice(self, update: Update,
context: ContextTypes.DEFAULT_TYPE):
chat_id = update.effective_chat.id
user_telegram_voice = await context.bot.get_file(update.message.voice.file_id)
bytes = await user_telegram_voice.download_as_bytearray()
user_voice = AudioSegment.from_file(io.BytesIO(bytes), format='ogg', codec='libopus')
agent_response, synth_response = self.get_response(chat_id, user_voice)
out_voice = io.BytesIO()
synth_response.export(out_f=out_voice, format='ogg', codec='libopus')
await context.bot.send_message(chat_id=update.effective_chat.id,
text=agent_response)
await context.bot.send_voice(chat_id=chat_id, voice=out_voice)
async def handle_telegram_select_voice(self, update: Update,
context: ContextTypes.DEFAULT_TYPE):
if not (context.args):
await context.bot.send_message(
chat_id=update.effective_chat.id,
text='You must include a voice name. Use /list to list available voices')
return
selected_voice = context.args[0]
for voice_name, voice_id in self.voice_name_to_id.items():
if voice_name.lower() == selected_voice.lower():
setattr(self.synthesizer, voice_attr_of[type(self.synthesizer)], voice_id)
await context.bot.send_message(chat_id=update.effective_chat.id,
text='Voice changed successfully!')
return
await context.bot.send_message(
chat_id=update.effective_chat.id,
text=
'Sorry, I do not recognize that voice. Use /list to list available voices.')
async def handle_telegram_list_voices(self, update: Update,
context: ContextTypes.DEFAULT_TYPE):
voices = "\n".join(self.voice_name_to_id.keys())
await context.bot.send_message(chat_id=update.effective_chat.id,
text=f'Available voices:\n{voices}')
async def handle_telegram_who(self, update: Update,
context: ContextTypes.DEFAULT_TYPE):
current_voice_name = self.get_current_voice()
await context.bot.send_message(
chat_id=update.effective_chat.id,
text=f"I am currently '{current_voice_name}'.")
async def handle_telegram_help(self, update: Update,
context: ContextTypes.DEFAULT_TYPE):
help_text = '''
I'm a voice bot, here to talk with you! Here's what you can do:
- Send me a voice message and I'll respond with a voice message.
- Use /voice <voice_name> to change the voice I use to respond.
- Use /list to see a list of available voices.
- Use /who to see what voice I currently am.
- Use /help to see this help message again.
'''
await context.bot.send_message(chat_id=update.effective_chat.id,
text=help_text)
async def handle_telegram_unknown_cmd(self, update: Update,
context: ContextTypes.DEFAULT_TYPE):
await context.bot.send_message(chat_id=update.effective_chat.id,
text='''
Sorry, I didn\'t understand that command. Use /help to see available commands'''
)
async def handle_telegram_unknown(self, update: Update,
context: ContextTypes.DEFAULT_TYPE):
await context.bot.send_message(chat_id=update.effective_chat.id,
text='''
Sorry, I only respond to voice messages or commands. Use /help for more information.'''
)
if __name__ == '__main__':
transcriber = WhisperTranscriber()
synthesizer = SYNTH
if type(synthesizer) == ElevenLabsSynthesizer:
subprocess.run(['pip', 'install', 'elevenlabs'])
voco = VocodeBotResponder(transcriber, SYSTEM_PROMPT, synthesizer, VOICE_NAME_TO_ID)
application = ApplicationBuilder().token(
os.environ['TELEGRAM_BOT_KEY']).build()
start_handler = CommandHandler('start', voco.handle_telegram_start)
voice_handler = MessageHandler(filters.VOICE & (~filters.COMMAND),
voco.handle_telegram_voice)
select_handler = CommandHandler('voice', voco.handle_telegram_select_voice)
list_handler = CommandHandler('list', voco.handle_telegram_list_voices)
who_handler = CommandHandler('who', voco.handle_telegram_who)
help_handler = CommandHandler('help', voco.handle_telegram_help)
unknown_cmd_handler = MessageHandler(filters.COMMAND,
voco.handle_telegram_unknown_cmd)
unknown_handler = MessageHandler(~filters.COMMAND,
voco.handle_telegram_unknown)
application.add_handler(start_handler)
application.add_handler(voice_handler)
application.add_handler(select_handler)
application.add_handler(list_handler)
application.add_handler(who_handler)
application.add_handler(help_handler)
application.add_handler(unknown_cmd_handler)
application.add_handler(unknown_handler)
application.run_polling()