diff --git a/examples/voice-assistant/requirements.txt b/examples/voice-assistant/requirements.txt index a584f90af..d136eac1c 100644 --- a/examples/voice-assistant/requirements.txt +++ b/examples/voice-assistant/requirements.txt @@ -2,4 +2,5 @@ livekit-agents>=0.8.8 livekit-plugins-openai>=0.8.1 livekit-plugins-deepgram>=0.6.5 livekit-plugins-silero>=0.6.4 -python-dotenv~=1.0 \ No newline at end of file +python-dotenv~=1.0 +aiofile~=3.8.8 diff --git a/examples/voice-assistant/save_chatctx.py b/examples/voice-assistant/save_chatctx.py new file mode 100644 index 000000000..d6b1b6ac6 --- /dev/null +++ b/examples/voice-assistant/save_chatctx.py @@ -0,0 +1,84 @@ +import asyncio +from datetime import datetime + +from aiofile import async_open as open +from dotenv import load_dotenv +from livekit import rtc +from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli, llm +from livekit.agents.voice_assistant import VoiceAssistant +from livekit.plugins import deepgram, openai, silero + +load_dotenv() + + +async def entrypoint(ctx: JobContext): + initial_ctx = llm.ChatContext().append( + role="system", + text=( + "You are a voice assistant created by LiveKit. Your interface with users will be voice. " + "You should use short and concise responses, and avoiding usage of unpronouncable punctuation." + ), + ) + + await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY) + + assistant = VoiceAssistant( + vad=silero.VAD.load(), + stt=deepgram.STT(), + llm=openai.LLM(), + tts=openai.TTS(), + chat_ctx=initial_ctx, + ) + assistant.start(ctx.room) + + # listen to incoming chat messages, only required if you'd like the agent to + # answer incoming messages from Chat + chat = rtc.ChatManager(ctx.room) + + async def answer_from_text(txt: str): + chat_ctx = assistant.chat_ctx.copy() + chat_ctx.append(role="user", text=txt) + stream = assistant.llm.chat(chat_ctx=chat_ctx) + await assistant.say(stream) + + @chat.on("message_received") + def on_chat_received(msg: rtc.ChatMessage): + if msg.message: + asyncio.create_task(answer_from_text(msg.message)) + + log_queue = asyncio.Queue() + + @assistant.on("user_speech_committed") + def on_user_speech_committed(msg: llm.ChatMessage): + # convert string lists to strings, drop images + if isinstance(msg.content, list): + msg.content = "\n".join( + "[image]" if isinstance(x, llm.ChatImage) else x for x in msg + ) + log_queue.put_nowait(f"[{datetime.now()}] USER:\n{msg.content}\n\n") + + @assistant.on("agent_speech_committed") + def on_agent_speech_committed(msg: llm.ChatMessage): + log_queue.put_nowait(f"[{datetime.now()}] AGENT:\n{msg.content}\n\n") + + async def write_transcription(): + async with open("transcriptions.log", "w") as f: + while True: + msg = await log_queue.get() + if msg is None: + break + await f.write(msg) + + write_task = asyncio.create_task(write_transcription()) + + async def finish_queue(): + log_queue.put_nowait(None) + await write_task + + ctx.add_shutdown_callback(finish_queue) + + await assistant.say("Hey, how can I help you today?", allow_interruptions=True) + + +if __name__ == "__main__": + cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))