livekit · theomonnom · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024
diff --git a/.changeset/fluffy-terms-arrive.md b/.changeset/fluffy-terms-arrive.md
@@ -0,0 +1,5 @@
+---
+"livekit-agents": patch
+---
+
+automatically create stt.StreamAdapter when provided stt doesn't support streaming
diff --git a/examples/voice-assistant/minimal_assistant.py b/examples/voice-assistant/minimal_assistant.py
@@ -2,7 +2,7 @@
 
 from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli, llm
 from livekit.agents.voice_assistant import VoiceAssistant
-from livekit.plugins import deepgram, elevenlabs, openai, silero
+from livekit.plugins import deepgram, openai, silero
 
 
 async def entrypoint(ctx: JobContext):
@@ -20,7 +20,7 @@ async def entrypoint(ctx: JobContext):
         vad=silero.VAD.load(),
         stt=deepgram.STT(),
         llm=openai.LLM(),
-        tts=elevenlabs.TTS(),
+        tts=openai.TTS(),
         chat_ctx=initial_ctx,
     )
     assistant.start(ctx.room)

diff --git a/examples/voice-assistant/requirements.txt b/examples/voice-assistant/requirements.txt
@@ -1,6 +1,5 @@
 livekit-agents~=0.7.2
 livekit-plugins-openai~=0.6
 livekit-plugins-deepgram~=0.5
-livekit-plugins-elevenlabs~=0.6
 livekit-plugins-silero~=0.5
 
diff --git a/livekit-agents/livekit/agents/stt/stt.py b/livekit-agents/livekit/agents/stt/stt.py
@@ -62,8 +62,7 @@ async def recognize(
 
     def stream(self, *, language: str | None = None) -> "SpeechStream":
         raise NotImplementedError(
-            "streaming is not supported by this STT, please use \
-            a different STT or use a StreamAdapter"
+            "streaming is not supported by this STT, please use a different STT or use a StreamAdapter"
         )
 
     async def aclose(self) -> None:

diff --git a/livekit-agents/livekit/agents/voice_assistant/voice_assistant.py b/livekit-agents/livekit/agents/voice_assistant/voice_assistant.py
@@ -144,13 +144,22 @@ def __init__(
 
         # wrap with StreamAdapter automatically when streaming is not supported on a specific TTS
         # to override StreamAdapter options, create the adapter manually
+
         if not tts.capabilities.streaming:
             from .. import tts as text_to_speech
 
             tts = text_to_speech.StreamAdapter(
                 tts=tts, sentence_tokenizer=tokenize.basic.SentenceTokenizer()
             )
 
+        if not stt.capabilities.streaming:
+            from .. import stt as speech_to_text
+
+            stt = speech_to_text.StreamAdapter(
+                stt=stt,
+                vad=vad,
+            )
+
         self._stt, self._vad, self._llm, self._tts = stt, vad, llm, tts
         self._chat_ctx = chat_ctx or ChatContext()
         self._fnc_ctx = fnc_ctx