Cartesia

2024-11-01 16:48:12 -07:00 · 2024-11-01 16:48:12 -07:00 · befddaf205
parent a5b6948c9c
commit befddaf205
6 changed files with 1963 additions and 1677 deletions
--- a/software/main.py
+++ b/software/main.py
@ -258,7 +258,7 @@ def run(
        ### START LIVEKIT WORKER
        if server == "livekit":
-            time.sleep(5)
+            time.sleep(1)
            # These are needed to communicate with the worker's entrypoint
            os.environ['INTERPRETER_SERVER_HOST'] = light_server_host
            os.environ['INTERPRETER_SERVER_PORT'] = str(light_server_port)
@ -274,6 +274,7 @@ def run(
            )).to_jwt())
            # meet_url = f'http://localhost:3000/custom?liveKitUrl={url.replace("http", "ws")}&token={token}\n\n'
            meet_url = f'https://meet.livekit.io/custom?liveKitUrl={url.replace("http", "ws")}&token={token}\n\n'
            print("\n")
            print("For debugging, you can join a video call with your assistant. Click the link below, then send a chat message that says {CONTEXT_MODE_OFF}, then begin speaking:")
            print(meet_url)
--- a/software/poetry.lock
+++ b/software/poetry.lock
--- a/software/pyproject.toml
+++ b/software/pyproject.toml
@ -26,6 +26,7 @@ realtimestt = "^0.2.41"
 pynput = "^1.7.7"
 yaspin = "^3.0.2"
 pywebview = "^5.2"
 livekit-plugins-cartesia = "^0.4.2"
 [build-system]
 requires = ["poetry-core"]
--- a/software/source/server/livekit/worker.py
+++ b/software/source/server/livekit/worker.py
@ -5,7 +5,7 @@ from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
 from livekit.agents.llm import ChatContext, ChatMessage
 from livekit import rtc
 from livekit.agents.voice_assistant import VoiceAssistant
-from livekit.plugins import deepgram, openai, silero, elevenlabs
+from livekit.plugins import deepgram, openai, silero, elevenlabs, cartesia
 from dotenv import load_dotenv
 import sys
 import numpy as np
@ -81,7 +81,7 @@ async def entrypoint(ctx: JobContext):
    elif tts_provider == 'elevenlabs':
        tts = elevenlabs.TTS()
    elif tts_provider == 'cartesia':
-        pass # import plugin, TODO support this
+        tts = cartesia.TTS()
    else:
        raise ValueError(f"Unsupported TTS provider: {tts_provider}. Please set 01_TTS environment variable to 'openai' or 'elevenlabs'.")
--- a/software/source/server/profiles/default.py
+++ b/software/source/server/profiles/default.py
@ -1,7 +1,7 @@
 from interpreter import AsyncInterpreter
 interpreter = AsyncInterpreter()
-interpreter.tts = "elevenlabs" # This should be cartesia once we support it
+interpreter.tts = "cartesia"
 interpreter.stt = "deepgram" # This is only used for the livekit server. The light server runs faster-whisper locally
 # Connect your 01 to a language model
--- a/software/source/server/profiles/fast.py
+++ b/software/source/server/profiles/fast.py
@ -1,7 +1,7 @@
 from interpreter import AsyncInterpreter
 interpreter = AsyncInterpreter()
-interpreter.tts = "elevenlabs" # This should be cartesia once we support it
+interpreter.tts = "cartesia" # This should be cartesia once we support it
 interpreter.stt = "deepgram" # This is only used for the livekit server. The light server runs faster-whisper locally
 interpreter.llm.model = "gpt-4o-mini"