Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update examples to the latest API & export AutoSubscribe #534

Merged
merged 9 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/gentle-numbers-happen.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"livekit-agents": patch
---

update examples to the latest API & export AutoSubscribe
49 changes: 42 additions & 7 deletions .github/update_versions.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import json
import pathlib
import re


def update_version(project_root: pathlib.Path, py_version_path: pathlib.Path):
def update_py_version(project_root: pathlib.Path, py_version_path: pathlib.Path) -> str:
with open(project_root / "package.json") as f:
package = json.load(f)
version = package["version"]
Expand All @@ -17,17 +18,51 @@ def update_version(project_root: pathlib.Path, py_version_path: pathlib.Path):
with open(py_version_path, "w") as f:
f.writelines(lines)

return version


def update_requirements_txt(example_dir: pathlib.Path, last_versions: dict[str, str]):
# recursively find all requirements.txt files
requirements_files = example_dir.rglob("requirements.txt")

for req_file in requirements_files:
with open(req_file, "r") as f:
lines = f.readlines()

for i, line in enumerate(lines):
parts = re.split(r"(>=|==|<=|~=|!=|>)", line.strip())
if len(parts) <= 1:
continue

pkg_name = parts[0].strip()
if pkg_name in last_versions:
lines[i] = f"{pkg_name}>={last_versions[pkg_name]}\n"

with open(req_file, "w") as f:
f.writelines(lines)


if __name__ == "__main__":
livekit_agents = pathlib.Path.cwd() / "livekit-agents"
update_version(livekit_agents, livekit_agents / "livekit" / "agents" / "version.py")
package_versions = {}

agents_root = pathlib.Path.cwd() / "livekit-agents"
plugins_root = pathlib.Path.cwd() / "livekit-plugins"
plugins = plugins_root.iterdir()
for plugin in plugins:
examples_root = pathlib.Path.cwd() / "examples"

agent_version = update_py_version(
agents_root, agents_root / "livekit" / "agents" / "version.py"
)
package_versions["livekit-agents"] = agent_version

for plugin in plugins_root.iterdir():
if not plugin.is_dir():
continue

plugin_name = plugin.name.split("-")[-1]
py_version_path = plugin / "livekit" / "plugins" / plugin_name / "version.py"
update_version(plugin, py_version_path)
version = update_py_version(
plugin, plugin / "livekit" / "plugins" / plugin_name / "version.py"
)
package_versions[plugin.name] = version

# update requirements.txt of our examples
update_requirements_txt(examples_root, package_versions)
13 changes: 8 additions & 5 deletions examples/minimal_worker.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import logging

from livekit.agents import JobContext, WorkerOptions, cli
from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli

logger = logging.getLogger("my-worker")
logger.setLevel(logging.INFO)


async def entrypoint(ctx: JobContext):
logging.info("starting entrypoint")
logger.info("starting entrypoint")

# Connect to the room
await ctx.connect()
await ctx.connect(auto_subscribe=AutoSubscribe.SUBSCRIBE_ALL)

# Add your agent logic here!
logger.info("connected to the room")
# add your agent logic here!


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion examples/simple-color/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ async def _draw_color():
frame = rtc.VideoFrame(WIDTH, HEIGHT, rtc.VideoBufferType.RGBA, argb_frame)
source.capture_frame(frame)

asyncio.create_task(_draw_color())
await _draw_color()


if __name__ == "__main__":
Expand Down
14 changes: 9 additions & 5 deletions examples/speech-to-text/deepgram_stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from livekit import rtc
from livekit.agents import (
AutoSubscribe,
JobContext,
WorkerOptions,
cli,
Expand All @@ -11,6 +12,9 @@
)
from livekit.plugins import deepgram

logger = logging.getLogger("deepgram-stt-demo")
logger.setLevel(logging.INFO)


async def _forward_transcription(
stt_stream: stt.SpeechStream, stt_forwarder: transcription.STTSegmentsForwarder
Expand All @@ -25,15 +29,15 @@ async def _forward_transcription(
print(" -> ", ev.alternatives[0].text)


async def entrypoint(job: JobContext):
logging.info("starting speech-to-text example")
async def entrypoint(ctx: JobContext):
logger.info("starting speech-to-text example")
stt = deepgram.STT()
tasks = []

async def transcribe_track(participant: rtc.RemoteParticipant, track: rtc.Track):
audio_stream = rtc.AudioStream(track)
stt_forwarder = transcription.STTSegmentsForwarder(
room=job.room, participant=participant, track=track
room=ctx.room, participant=participant, track=track
)
stt_stream = stt.stream()
stt_task = asyncio.create_task(
Expand All @@ -44,9 +48,9 @@ async def transcribe_track(participant: rtc.RemoteParticipant, track: rtc.Track)
async for ev in audio_stream:
stt_stream.push_frame(ev.frame)

await job.connect(auto_subscribe="audio_only")
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)

@job.room.on("track_subscribed")
@ctx.room.on("track_subscribed")
def on_track_subscribed(
track: rtc.Track,
publication: rtc.TrackPublication,
Expand Down
9 changes: 6 additions & 3 deletions examples/text-to-speech/elevenlabs_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from livekit.agents import JobContext, WorkerOptions, cli
from livekit.plugins import elevenlabs

logger = logging.getLogger("elevenlabs-tts-demo")
logger.setLevel(logging.INFO)


def _text_to_chunks(text: str) -> list[str]:
"""Split the text into chunks of 2, 3, and 4 words"""
Expand Down Expand Up @@ -51,20 +54,20 @@ async def entrypoint(job: JobContext):
await job.room.local_participant.publish_track(track, options)

await asyncio.sleep(1)
logging.info('Saying "Bonjour, comment allez-vous?"')
logger.info('Saying "Bonjour, comment allez-vous?"')
async for output in tts_11labs.synthesize("Bonjour, comment allez-vous?"):
await source.capture_frame(output.frame)

await asyncio.sleep(1)
logging.info('Saying "Au revoir."')
logger.info('Saying "Au revoir."')
async for output in tts_11labs.synthesize("Au revoir."):
await source.capture_frame(output.frame)

await asyncio.sleep(1)
streamed_text = (
"Bonjour, ceci est un autre example avec la méthode utilisant un websocket."
)
logging.info('Streaming text "%s"', streamed_text)
logger.info('Streaming text "%s"', streamed_text)
stream = tts_11labs.stream()
for chunk in _text_to_chunks(
streamed_text
Expand Down
13 changes: 8 additions & 5 deletions examples/text-to-speech/openai_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
import logging

from livekit import rtc
from livekit.agents import JobContext, WorkerOptions, cli
from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
from livekit.plugins import openai

logger = logging.getLogger("openai-tts-demo")
logger.setLevel(logging.INFO)


async def entrypoint(job: JobContext):
logging.info("starting tts example agent")
logger.info("starting tts example agent")

tts = openai.TTS(model="tts-1", voice="nova")

Expand All @@ -16,16 +19,16 @@ async def entrypoint(job: JobContext):
options = rtc.TrackPublishOptions()
options.source = rtc.TrackSource.SOURCE_MICROPHONE

await job.connect()
await job.connect(auto_subscribe=AutoSubscribe.SUBSCRIBE_NONE)
await job.room.local_participant.publish_track(track, options)

await asyncio.sleep(1)
logging.info('Saying "Hello!"')
logger.info('Saying "Hello!"')
async for output in tts.synthesize("Hello!"):
await source.capture_frame(output.frame)

await asyncio.sleep(1)
logging.info('Saying "Goodbye."')
logger.info('Saying "Goodbye."')
async for output in tts.synthesize("Goodbye."):
await source.capture_frame(output.frame)

Expand Down
Loading
Loading