-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapp.py
156 lines (137 loc) · 5.61 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
from io import BytesIO
import chainlit as cl
from chainlit.element import ElementBased
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
from openai import AsyncOpenAI
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
# from llama_index.llms.bedrock import Bedrock
from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler
import os
import httpx
import sys
sys.path.append("./src")
from utils import initialize_agent
cl.instrument_openai
client = AsyncOpenAI()
ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
ELEVENLABS_VOICE_ID = os.environ.get("ELEVENLABS_VOICE_ID")
if not ELEVENLABS_API_KEY or not ELEVENLABS_VOICE_ID:
raise ValueError("ELEVENLABS_API_KEY and ELEVENLABS_VOICE_ID must be set")
@cl.step(type="bool")
async def speech_to_text(audio_file):
response = await client.audio.transcriptions.create(
model="whisper-1",
file = audio_file,
language="en",
)
return response.text
@cl.step(type="bool")
async def text_to_speech(text: str, mime_type: str):
CHUNK_SIZE = 1024
url = f"https://api.elevenlabs.io/v1/text-to-speech/{ELEVENLABS_VOICE_ID}"
headers = {
"Accept": mime_type,
"Content-Type": "application/json",
"xi-api-key": ELEVENLABS_API_KEY
}
data = {
"text": text,
"model_id": "eleven_turbo_v2",
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.5
}
}
async with httpx.AsyncClient(timeout=25.0) as client:
response = await client.post(url, json = data, headers = headers)
response.raise_for_status() #ensures that we are alerted on bad responses
buffer = BytesIO()
buffer.name = f"output_audio.{mime_type.split("/")[1]}"
async for chunk in response.aiter_bytes(chunk_size=CHUNK_SIZE):
if chunk:
buffer.write(chunk)
buffer.seek(0)
return buffer.name, buffer.read()
@cl.on_chat_start
async def on_chat_start():
await cl.Avatar(
name = "Dylan",
path = "./images/dylan.png"
).send()
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
Settings.callback_manager = CallbackManager([llama_debug,
cl.LlamaIndexCallbackHandler()])
Settings.llm = OpenAI(model="gpt-4-turbo", temperature=0.1)
# Settings.llm = Bedrock(
# model = "anthropic.claude-3-opus-20240229-v1:0",
# aws_access_key_id = os.environ["AWS_ACCESS_KEY"],
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"],
# aws_region_name = os.environ["AWS_DEFAULT_REGION"]
# )
agent = initialize_agent(llm = Settings.llm)
cl.user_session.set("agent", agent)
await cl.Message(
content = "Hi! How can I help you? Simply type your question or press `P` to speak",
author = "Dylan").send()
@cl.on_message
async def on_message(message: cl.Message):
agent = cl.user_session.get("agent")
response_message = await cl.Message(content="", author = "Dylan").send()
cl.user_session.set("response_message", response_message)
response = await cl.make_async(agent.stream_chat)(message.content)
response_message = cl.user_session.get("response_message")
for chunk in response.response_gen:
await response_message.stream_token(chunk)
await response_message.update()
if cl.context.session.client_type == "copilot":
fn = cl.CopilotFunction(name="test", args={"message": message.content,
"response": str(response)})
await fn.acall()
@cl.on_audio_chunk
async def on_audio_chunk(chunk: cl.AudioChunk):
if chunk.isStart:
buffer = BytesIO()
buffer.name = f"input_audio.{chunk.mimeType.split("/")[1]}"
cl.user_session.set("audio_buffer", buffer)
cl.user_session.set("audio_mime_type", chunk.mimeType)
cl.user_session.get("audio_buffer").write(chunk.data)
@cl.on_audio_end
async def on_audio_end(elements: list[ElementBased]):
audio_buffer: BytesIO = cl.user_session.get("audio_buffer")
audio_buffer.seek(0)
audio_file = audio_buffer.read()
audio_mime_type: str = cl.user_session.get("audio_mime_type")
input_audio_el = cl.Audio(
mime = audio_mime_type, content = audio_file, name=audio_buffer.name
)
whisper_input = (audio_buffer.name, audio_file, audio_mime_type)
transcription = await speech_to_text(whisper_input)
await cl.Message(
author="You",
type = "user_message",
content = transcription,
elements = [input_audio_el, *elements]
).send()
agent = cl.user_session.get("agent")
response_message = await cl.Message(content="", author = "Dylan").send()
cl.user_session.set("response_message", response_message)
response = await cl.make_async(agent.stream_chat)(transcription)
response_message = cl.user_session.get("response_message")
for chunk in response.response_gen:
await response_message.stream_token(chunk)
output_name, output_audio = await text_to_speech(response_message.content,
audio_mime_type)
output_audio_el = cl.Audio(
name = output_name,
auto_play = True,
mime = audio_mime_type,
content = output_audio,
)
response_message.elements = [output_audio_el]
await response_message.update()
if cl.context.session.client_type == "copilot":
fn = cl.CopilotFunction(name="test", args={"message": transcription,
"response": str(response)})
await fn.acall()