Multimodal code execution
This commit is contained in:
parent
521242c36d
commit
a5b6948c9c
|
@ -258,7 +258,7 @@ def run(
|
|||
|
||||
### START LIVEKIT WORKER
|
||||
if server == "livekit":
|
||||
time.sleep(7)
|
||||
time.sleep(5)
|
||||
# These are needed to communicate with the worker's entrypoint
|
||||
os.environ['INTERPRETER_SERVER_HOST'] = light_server_host
|
||||
os.environ['INTERPRETER_SERVER_PORT'] = str(light_server_port)
|
||||
|
@ -273,7 +273,7 @@ def run(
|
|||
room="my-room",
|
||||
)).to_jwt())
|
||||
|
||||
meet_url = f'https://meet.livekit.io/custom?liveKitUrl={url.replace("http", "ws")}&token={token}\n\n'
|
||||
# meet_url = f'http://localhost:3000/custom?liveKitUrl={url.replace("http", "ws")}&token={token}\n\n'
|
||||
print("\n")
|
||||
print("For debugging, you can join a video call with your assistant. Click the link below, then send a chat message that says {CONTEXT_MODE_OFF}, then begin speaking:")
|
||||
print(meet_url)
|
||||
|
|
|
@ -11,38 +11,110 @@ from livekit.agents.multimodal import MultimodalAgent
|
|||
from livekit.plugins import openai
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
import time
|
||||
from typing import Annotated
|
||||
from livekit.agents import llm
|
||||
|
||||
# Set the environment variable
|
||||
os.environ['INTERPRETER_TERMINAL_INPUT_PATIENCE'] = '200000'
|
||||
|
||||
instructions = """
|
||||
You are Open Interpreter, a world-class programmer that can complete any goal by executing code.
|
||||
For advanced requests, start by writing a plan.
|
||||
When you execute code, it will be executed **on the user's machine** in a stateful Jupyter notebook. The user has given you **full permission** to execute any code necessary to complete the task. Execute the code. You CAN run code on the users machine, using the tool you have access to.
|
||||
You can access the internet. Run **any code** to achieve the goal, and if at first you don't succeed, try again and again.
|
||||
You can install new packages.
|
||||
If you modify or create a file, YOU MUST THEN OPEN IT to display it to the user.
|
||||
Be concise. Do NOT send the user a markdown version of your code — just execute the code instantly. Execute the code!
|
||||
|
||||
You are capable of **any** task.
|
||||
|
||||
You MUST remember to pass into the execute_code function a correct JSON input like {"code": "print('hello world')"} and NOT a raw string or something else.
|
||||
"""
|
||||
|
||||
load_dotenv()
|
||||
|
||||
async def entrypoint(ctx: JobContext):
|
||||
|
||||
from interpreter import interpreter
|
||||
|
||||
def execute_code(code):
|
||||
print("--- code ---")
|
||||
print(code)
|
||||
print("---")
|
||||
#time.sleep(2)
|
||||
# Check if the code contains any file deletion commands
|
||||
if any(keyword in code.lower() for keyword in ['os.remove', 'os.unlink', 'shutil.rmtree', 'delete file', 'rm -']):
|
||||
print("Warning: File deletion commands detected. Execution aborted for safety.")
|
||||
return "Execution aborted: File deletion commands are not allowed."
|
||||
print("--- output ---")
|
||||
output = ""
|
||||
for chunk in interpreter.computer.run("python", code):
|
||||
if "content" in chunk and type(chunk["content"]) == str:
|
||||
output += "\n" + chunk["content"]
|
||||
print(chunk["content"])
|
||||
print("---")
|
||||
|
||||
output = output.strip()
|
||||
|
||||
if output == "":
|
||||
output = "No output was produced by running this code."
|
||||
return output
|
||||
|
||||
|
||||
# first define a class that inherits from llm.FunctionContext
|
||||
class AssistantFnc(llm.FunctionContext):
|
||||
# the llm.ai_callable decorator marks this function as a tool available to the LLM
|
||||
# by default, it'll use the docstring as the function's description
|
||||
@llm.ai_callable()
|
||||
async def execute(
|
||||
self,
|
||||
# by using the Annotated type, arg description and type are available to the LLM
|
||||
code: Annotated[
|
||||
str, llm.TypeInfo(description="The Python code to execute")
|
||||
],
|
||||
):
|
||||
"""Executes Python and returns the output"""
|
||||
return execute_code(code)
|
||||
|
||||
fnc_ctx = AssistantFnc()
|
||||
|
||||
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
||||
|
||||
participant = await ctx.wait_for_participant()
|
||||
|
||||
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
model = openai.realtime.RealtimeModel(
|
||||
instructions="You are a helpful assistant and you love open-source software",
|
||||
instructions=instructions,
|
||||
voice="shimmer",
|
||||
temperature=0.8,
|
||||
temperature=0.6,
|
||||
modalities=["audio", "text"],
|
||||
api_key=openai_api_key,
|
||||
base_url="wss://api.openai.com/v1",
|
||||
)
|
||||
assistant = MultimodalAgent(model=model)
|
||||
model._fnc_ctx = fnc_ctx
|
||||
assistant = MultimodalAgent(model=model, fnc_ctx=fnc_ctx)
|
||||
|
||||
assistant.start(ctx.room)
|
||||
|
||||
session = model.sessions[0]
|
||||
# Create a session with the function context
|
||||
session = model.session(
|
||||
chat_ctx=llm.ChatContext(),
|
||||
fnc_ctx=fnc_ctx,
|
||||
)
|
||||
|
||||
# Initial message to start the interaction
|
||||
session.conversation.item.create(
|
||||
llm.ChatMessage(
|
||||
role="user",
|
||||
content="Please begin the interaction with the user in a manner consistent with your instructions.",
|
||||
content="Hello!",
|
||||
)
|
||||
)
|
||||
session.response.create()
|
||||
|
||||
def main(livekit_url):
|
||||
# Workers have to be run as CLIs right now.
|
||||
# So we need to simualte running "[this file] dev"
|
||||
# So we need to simulate running "[this file] dev"
|
||||
|
||||
# Modify sys.argv to set the path to this file as the first argument
|
||||
# and 'dev' as the second argument
|
||||
|
|
Loading…
Reference in New Issue