Multimodal code execution
This commit is contained in:
parent
521242c36d
commit
a5b6948c9c
|
@ -258,7 +258,7 @@ def run(
|
||||||
|
|
||||||
### START LIVEKIT WORKER
|
### START LIVEKIT WORKER
|
||||||
if server == "livekit":
|
if server == "livekit":
|
||||||
time.sleep(7)
|
time.sleep(5)
|
||||||
# These are needed to communicate with the worker's entrypoint
|
# These are needed to communicate with the worker's entrypoint
|
||||||
os.environ['INTERPRETER_SERVER_HOST'] = light_server_host
|
os.environ['INTERPRETER_SERVER_HOST'] = light_server_host
|
||||||
os.environ['INTERPRETER_SERVER_PORT'] = str(light_server_port)
|
os.environ['INTERPRETER_SERVER_PORT'] = str(light_server_port)
|
||||||
|
@ -273,7 +273,7 @@ def run(
|
||||||
room="my-room",
|
room="my-room",
|
||||||
)).to_jwt())
|
)).to_jwt())
|
||||||
|
|
||||||
meet_url = f'https://meet.livekit.io/custom?liveKitUrl={url.replace("http", "ws")}&token={token}\n\n'
|
# meet_url = f'http://localhost:3000/custom?liveKitUrl={url.replace("http", "ws")}&token={token}\n\n'
|
||||||
print("\n")
|
print("\n")
|
||||||
print("For debugging, you can join a video call with your assistant. Click the link below, then send a chat message that says {CONTEXT_MODE_OFF}, then begin speaking:")
|
print("For debugging, you can join a video call with your assistant. Click the link below, then send a chat message that says {CONTEXT_MODE_OFF}, then begin speaking:")
|
||||||
print(meet_url)
|
print(meet_url)
|
||||||
|
|
|
@ -11,38 +11,110 @@ from livekit.agents.multimodal import MultimodalAgent
|
||||||
from livekit.plugins import openai
|
from livekit.plugins import openai
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
|
from typing import Annotated
|
||||||
|
from livekit.agents import llm
|
||||||
|
|
||||||
|
# Set the environment variable
|
||||||
|
os.environ['INTERPRETER_TERMINAL_INPUT_PATIENCE'] = '200000'
|
||||||
|
|
||||||
|
instructions = """
|
||||||
|
You are Open Interpreter, a world-class programmer that can complete any goal by executing code.
|
||||||
|
For advanced requests, start by writing a plan.
|
||||||
|
When you execute code, it will be executed **on the user's machine** in a stateful Jupyter notebook. The user has given you **full permission** to execute any code necessary to complete the task. Execute the code. You CAN run code on the users machine, using the tool you have access to.
|
||||||
|
You can access the internet. Run **any code** to achieve the goal, and if at first you don't succeed, try again and again.
|
||||||
|
You can install new packages.
|
||||||
|
If you modify or create a file, YOU MUST THEN OPEN IT to display it to the user.
|
||||||
|
Be concise. Do NOT send the user a markdown version of your code — just execute the code instantly. Execute the code!
|
||||||
|
|
||||||
|
You are capable of **any** task.
|
||||||
|
|
||||||
|
You MUST remember to pass into the execute_code function a correct JSON input like {"code": "print('hello world')"} and NOT a raw string or something else.
|
||||||
|
"""
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
async def entrypoint(ctx: JobContext):
|
async def entrypoint(ctx: JobContext):
|
||||||
|
|
||||||
|
from interpreter import interpreter
|
||||||
|
|
||||||
|
def execute_code(code):
|
||||||
|
print("--- code ---")
|
||||||
|
print(code)
|
||||||
|
print("---")
|
||||||
|
#time.sleep(2)
|
||||||
|
# Check if the code contains any file deletion commands
|
||||||
|
if any(keyword in code.lower() for keyword in ['os.remove', 'os.unlink', 'shutil.rmtree', 'delete file', 'rm -']):
|
||||||
|
print("Warning: File deletion commands detected. Execution aborted for safety.")
|
||||||
|
return "Execution aborted: File deletion commands are not allowed."
|
||||||
|
print("--- output ---")
|
||||||
|
output = ""
|
||||||
|
for chunk in interpreter.computer.run("python", code):
|
||||||
|
if "content" in chunk and type(chunk["content"]) == str:
|
||||||
|
output += "\n" + chunk["content"]
|
||||||
|
print(chunk["content"])
|
||||||
|
print("---")
|
||||||
|
|
||||||
|
output = output.strip()
|
||||||
|
|
||||||
|
if output == "":
|
||||||
|
output = "No output was produced by running this code."
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
# first define a class that inherits from llm.FunctionContext
|
||||||
|
class AssistantFnc(llm.FunctionContext):
|
||||||
|
# the llm.ai_callable decorator marks this function as a tool available to the LLM
|
||||||
|
# by default, it'll use the docstring as the function's description
|
||||||
|
@llm.ai_callable()
|
||||||
|
async def execute(
|
||||||
|
self,
|
||||||
|
# by using the Annotated type, arg description and type are available to the LLM
|
||||||
|
code: Annotated[
|
||||||
|
str, llm.TypeInfo(description="The Python code to execute")
|
||||||
|
],
|
||||||
|
):
|
||||||
|
"""Executes Python and returns the output"""
|
||||||
|
return execute_code(code)
|
||||||
|
|
||||||
|
fnc_ctx = AssistantFnc()
|
||||||
|
|
||||||
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
||||||
|
|
||||||
participant = await ctx.wait_for_participant()
|
participant = await ctx.wait_for_participant()
|
||||||
|
|
||||||
openai_api_key = os.getenv("OPENAI_API_KEY")
|
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||||
model = openai.realtime.RealtimeModel(
|
model = openai.realtime.RealtimeModel(
|
||||||
instructions="You are a helpful assistant and you love open-source software",
|
instructions=instructions,
|
||||||
voice="shimmer",
|
voice="shimmer",
|
||||||
temperature=0.8,
|
temperature=0.6,
|
||||||
modalities=["audio", "text"],
|
modalities=["audio", "text"],
|
||||||
api_key=openai_api_key,
|
api_key=openai_api_key,
|
||||||
base_url="wss://api.openai.com/v1",
|
base_url="wss://api.openai.com/v1",
|
||||||
)
|
)
|
||||||
assistant = MultimodalAgent(model=model)
|
model._fnc_ctx = fnc_ctx
|
||||||
|
assistant = MultimodalAgent(model=model, fnc_ctx=fnc_ctx)
|
||||||
|
|
||||||
assistant.start(ctx.room)
|
assistant.start(ctx.room)
|
||||||
|
|
||||||
session = model.sessions[0]
|
# Create a session with the function context
|
||||||
|
session = model.session(
|
||||||
|
chat_ctx=llm.ChatContext(),
|
||||||
|
fnc_ctx=fnc_ctx,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initial message to start the interaction
|
||||||
session.conversation.item.create(
|
session.conversation.item.create(
|
||||||
llm.ChatMessage(
|
llm.ChatMessage(
|
||||||
role="user",
|
role="user",
|
||||||
content="Please begin the interaction with the user in a manner consistent with your instructions.",
|
content="Hello!",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
session.response.create()
|
session.response.create()
|
||||||
|
|
||||||
def main(livekit_url):
|
def main(livekit_url):
|
||||||
# Workers have to be run as CLIs right now.
|
# Workers have to be run as CLIs right now.
|
||||||
# So we need to simualte running "[this file] dev"
|
# So we need to simulate running "[this file] dev"
|
||||||
|
|
||||||
# Modify sys.argv to set the path to this file as the first argument
|
# Modify sys.argv to set the path to this file as the first argument
|
||||||
# and 'dev' as the second argument
|
# and 'dev' as the second argument
|
||||||
|
|
Loading…
Reference in New Issue