Merge pull request #309 from benxu3/livekit-realtime
add realtime livekit multimodal worker
This commit is contained in:
commit
207ec088b4
|
@ -19,6 +19,7 @@ import time
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import signal
|
import signal
|
||||||
from source.server.livekit.worker import main as worker_main
|
from source.server.livekit.worker import main as worker_main
|
||||||
|
from source.server.livekit.multimodal import main as multimodal_main
|
||||||
import warnings
|
import warnings
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
@ -71,6 +72,11 @@ def run(
|
||||||
"--debug",
|
"--debug",
|
||||||
help="Print latency measurements and save microphone recordings locally for manual playback",
|
help="Print latency measurements and save microphone recordings locally for manual playback",
|
||||||
),
|
),
|
||||||
|
multimodal: bool = typer.Option(
|
||||||
|
False,
|
||||||
|
"--multimodal",
|
||||||
|
help="Run the multimodal agent",
|
||||||
|
),
|
||||||
):
|
):
|
||||||
|
|
||||||
threads = []
|
threads = []
|
||||||
|
@ -274,7 +280,10 @@ def run(
|
||||||
|
|
||||||
for attempt in range(30):
|
for attempt in range(30):
|
||||||
try:
|
try:
|
||||||
worker_main(local_livekit_url)
|
if multimodal:
|
||||||
|
multimodal_main(local_livekit_url)
|
||||||
|
else:
|
||||||
|
worker_main(local_livekit_url)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print("Exiting.")
|
print("Exiting.")
|
||||||
raise
|
raise
|
||||||
|
|
|
@ -12,12 +12,12 @@ readme = "../README.md"
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = ">=3.10,<3.12"
|
python = ">=3.10,<3.12"
|
||||||
livekit = "^0.12.1"
|
livekit = "^0.17.2"
|
||||||
livekit-agents = "^0.8.6"
|
livekit-agents = "^0.10.0"
|
||||||
livekit-plugins-deepgram = "^0.6.5"
|
livekit-plugins-deepgram = "^0.6.7"
|
||||||
livekit-plugins-openai = "^0.8.1"
|
livekit-plugins-openai = "^0.10.1"
|
||||||
livekit-plugins-silero = "^0.6.4"
|
livekit-plugins-silero = "^0.7.1"
|
||||||
livekit-plugins-elevenlabs = "^0.7.3"
|
livekit-plugins-elevenlabs = "^0.7.5"
|
||||||
segno = "^1.6.1"
|
segno = "^1.6.1"
|
||||||
open-interpreter = {extras = ["os", "server"], version = "^0.3.12"} # You should add a "browser" extra, so selenium isn't in the main package
|
open-interpreter = {extras = ["os", "server"], version = "^0.3.12"} # You should add a "browser" extra, so selenium isn't in the main package
|
||||||
ngrok = "^1.4.0"
|
ngrok = "^1.4.0"
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from livekit.agents import (
|
||||||
|
AutoSubscribe,
|
||||||
|
JobContext,
|
||||||
|
WorkerOptions,
|
||||||
|
cli,
|
||||||
|
llm,
|
||||||
|
)
|
||||||
|
from livekit.agents.multimodal import MultimodalAgent
|
||||||
|
from livekit.plugins import openai
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import os
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
async def entrypoint(ctx: JobContext):
|
||||||
|
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
||||||
|
|
||||||
|
participant = await ctx.wait_for_participant()
|
||||||
|
|
||||||
|
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||||
|
model = openai.realtime.RealtimeModel(
|
||||||
|
instructions="You are a helpful assistant and you love open-source software",
|
||||||
|
voice="shimmer",
|
||||||
|
temperature=0.8,
|
||||||
|
modalities=["audio", "text"],
|
||||||
|
api_key=openai_api_key,
|
||||||
|
base_url="wss://api.openai.com/v1",
|
||||||
|
)
|
||||||
|
assistant = MultimodalAgent(model=model)
|
||||||
|
assistant.start(ctx.room)
|
||||||
|
|
||||||
|
session = model.sessions[0]
|
||||||
|
session.conversation.item.create(
|
||||||
|
llm.ChatMessage(
|
||||||
|
role="user",
|
||||||
|
content="Please begin the interaction with the user in a manner consistent with your instructions.",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
session.response.create()
|
||||||
|
|
||||||
|
def main(livekit_url):
|
||||||
|
# Workers have to be run as CLIs right now.
|
||||||
|
# So we need to simualte running "[this file] dev"
|
||||||
|
|
||||||
|
# Modify sys.argv to set the path to this file as the first argument
|
||||||
|
# and 'dev' as the second argument
|
||||||
|
sys.argv = [str(__file__), 'dev']
|
||||||
|
|
||||||
|
# Initialize the worker with the entrypoint
|
||||||
|
cli.run_app(
|
||||||
|
WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=livekit_url, port=8082)
|
||||||
|
)
|
Loading…
Reference in New Issue