livekit · keepingitneil · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024
diff --git a/.changeset/gentle-numbers-happen.md b/.changeset/gentle-numbers-happen.md
@@ -0,0 +1,5 @@
+---
+"livekit-agents": patch
+---
+
+update examples to the latest API & export AutoSubscribe
diff --git a/.github/update_versions.py b/.github/update_versions.py
@@ -1,8 +1,9 @@
 import json
 import pathlib
+import re
 
 
-def update_version(project_root: pathlib.Path, py_version_path: pathlib.Path):
+def update_py_version(project_root: pathlib.Path, py_version_path: pathlib.Path) -> str:
     with open(project_root / "package.json") as f:
         package = json.load(f)
         version = package["version"]
@@ -17,17 +18,51 @@ def update_version(project_root: pathlib.Path, py_version_path: pathlib.Path):
     with open(py_version_path, "w") as f:
         f.writelines(lines)
 
+    return version
+
+
+def update_requirements_txt(example_dir: pathlib.Path, last_versions: dict[str, str]):
+    # recursively find all requirements.txt files
+    requirements_files = example_dir.rglob("requirements.txt")
+
+    for req_file in requirements_files:
+        with open(req_file, "r") as f:
+            lines = f.readlines()
+
+        for i, line in enumerate(lines):
+            parts = re.split(r"(>=|==|<=|~=|!=|>)", line.strip())
+            if len(parts) <= 1:
+                continue
+
+            pkg_name = parts[0].strip()
+            if pkg_name in last_versions:
+                lines[i] = f"{pkg_name}>={last_versions[pkg_name]}\n"
+
+        with open(req_file, "w") as f:
+            f.writelines(lines)
+
 
 if __name__ == "__main__":
-    livekit_agents = pathlib.Path.cwd() / "livekit-agents"
-    update_version(livekit_agents, livekit_agents / "livekit" / "agents" / "version.py")
+    package_versions = {}
 
+    agents_root = pathlib.Path.cwd() / "livekit-agents"
     plugins_root = pathlib.Path.cwd() / "livekit-plugins"
-    plugins = plugins_root.iterdir()
-    for plugin in plugins:
+    examples_root = pathlib.Path.cwd() / "examples"
+
+    agent_version = update_py_version(
+        agents_root, agents_root / "livekit" / "agents" / "version.py"
+    )
+    package_versions["livekit-agents"] = agent_version
+
+    for plugin in plugins_root.iterdir():
         if not plugin.is_dir():
             continue
 
         plugin_name = plugin.name.split("-")[-1]
-        py_version_path = plugin / "livekit" / "plugins" / plugin_name / "version.py"
-        update_version(plugin, py_version_path)
+        version = update_py_version(
+            plugin, plugin / "livekit" / "plugins" / plugin_name / "version.py"
+        )
+        package_versions[plugin.name] = version
+
+    # update requirements.txt of our examples
+    update_requirements_txt(examples_root, package_versions)
diff --git a/examples/minimal_worker.py b/examples/minimal_worker.py
@@ -1,15 +1,18 @@
 import logging
 
-from livekit.agents import JobContext, WorkerOptions, cli
+from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
+
+logger = logging.getLogger("my-worker")
+logger.setLevel(logging.INFO)
 
 
 async def entrypoint(ctx: JobContext):
-    logging.info("starting entrypoint")
+    logger.info("starting entrypoint")
 
-    # Connect to the room
-    await ctx.connect()
+    await ctx.connect(auto_subscribe=AutoSubscribe.SUBSCRIBE_ALL)
 
-    # Add your agent logic here!
+    logger.info("connected to the room")
+    # add your agent logic here!
 
 
 if __name__ == "__main__":

diff --git a/examples/simple-color/agent.py b/examples/simple-color/agent.py
@@ -30,7 +30,7 @@ async def _draw_color():
             frame = rtc.VideoFrame(WIDTH, HEIGHT, rtc.VideoBufferType.RGBA, argb_frame)
             source.capture_frame(frame)
 
-    asyncio.create_task(_draw_color())
+    await _draw_color()
 
 
 if __name__ == "__main__":

diff --git a/examples/speech-to-text/deepgram_stt.py b/examples/speech-to-text/deepgram_stt.py
@@ -3,6 +3,7 @@
 
 from livekit import rtc
 from livekit.agents import (
+    AutoSubscribe,
     JobContext,
     WorkerOptions,
     cli,
@@ -11,6 +12,9 @@
 )
 from livekit.plugins import deepgram
 
+logger = logging.getLogger("deepgram-stt-demo")
+logger.setLevel(logging.INFO)
+
 
 async def _forward_transcription(
     stt_stream: stt.SpeechStream, stt_forwarder: transcription.STTSegmentsForwarder
@@ -25,15 +29,15 @@ async def _forward_transcription(
             print(" -> ", ev.alternatives[0].text)
 
 
-async def entrypoint(job: JobContext):
-    logging.info("starting speech-to-text example")
+async def entrypoint(ctx: JobContext):
+    logger.info("starting speech-to-text example")
     stt = deepgram.STT()
     tasks = []
 
     async def transcribe_track(participant: rtc.RemoteParticipant, track: rtc.Track):
         audio_stream = rtc.AudioStream(track)
         stt_forwarder = transcription.STTSegmentsForwarder(
-            room=job.room, participant=participant, track=track
+            room=ctx.room, participant=participant, track=track
         )
         stt_stream = stt.stream()
         stt_task = asyncio.create_task(
@@ -44,9 +48,9 @@ async def transcribe_track(participant: rtc.RemoteParticipant, track: rtc.Track)
         async for ev in audio_stream:
             stt_stream.push_frame(ev.frame)
 
-    await job.connect(auto_subscribe="audio_only")
+    await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
 
-    @job.room.on("track_subscribed")
+    @ctx.room.on("track_subscribed")
     def on_track_subscribed(
         track: rtc.Track,
         publication: rtc.TrackPublication,

diff --git a/examples/text-to-speech/elevenlabs_tts.py b/examples/text-to-speech/elevenlabs_tts.py
@@ -6,6 +6,9 @@
 from livekit.agents import JobContext, WorkerOptions, cli
 from livekit.plugins import elevenlabs
 
+logger = logging.getLogger("elevenlabs-tts-demo")
+logger.setLevel(logging.INFO)
+
 
 def _text_to_chunks(text: str) -> list[str]:
     """Split the text into chunks of 2, 3, and 4 words"""
@@ -51,20 +54,20 @@ async def entrypoint(job: JobContext):
     await job.room.local_participant.publish_track(track, options)
 
     await asyncio.sleep(1)
-    logging.info('Saying "Bonjour, comment allez-vous?"')
+    logger.info('Saying "Bonjour, comment allez-vous?"')
     async for output in tts_11labs.synthesize("Bonjour, comment allez-vous?"):
         await source.capture_frame(output.frame)
 
     await asyncio.sleep(1)
-    logging.info('Saying "Au revoir."')
+    logger.info('Saying "Au revoir."')
     async for output in tts_11labs.synthesize("Au revoir."):
         await source.capture_frame(output.frame)
 
     await asyncio.sleep(1)
     streamed_text = (
         "Bonjour, ceci est un autre example avec la méthode utilisant un websocket."
     )
-    logging.info('Streaming text "%s"', streamed_text)
+    logger.info('Streaming text "%s"', streamed_text)
     stream = tts_11labs.stream()
     for chunk in _text_to_chunks(
         streamed_text

diff --git a/examples/text-to-speech/openai_tts.py b/examples/text-to-speech/openai_tts.py
@@ -2,12 +2,15 @@
 import logging
 
 from livekit import rtc
-from livekit.agents import JobContext, WorkerOptions, cli
+from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
 from livekit.plugins import openai
 
+logger = logging.getLogger("openai-tts-demo")
+logger.setLevel(logging.INFO)
+
 
 async def entrypoint(job: JobContext):
-    logging.info("starting tts example agent")
+    logger.info("starting tts example agent")
 
     tts = openai.TTS(model="tts-1", voice="nova")
 
@@ -16,16 +19,16 @@ async def entrypoint(job: JobContext):
     options = rtc.TrackPublishOptions()
     options.source = rtc.TrackSource.SOURCE_MICROPHONE
 
-    await job.connect()
+    await job.connect(auto_subscribe=AutoSubscribe.SUBSCRIBE_NONE)
     await job.room.local_participant.publish_track(track, options)
 
     await asyncio.sleep(1)
-    logging.info('Saying "Hello!"')
+    logger.info('Saying "Hello!"')
     async for output in tts.synthesize("Hello!"):
         await source.capture_frame(output.frame)
 
     await asyncio.sleep(1)
-    logging.info('Saying "Goodbye."')
+    logger.info('Saying "Goodbye."')
     async for output in tts.synthesize("Goodbye."):
         await source.capture_frame(output.frame)