process custom flags on worker

2024-09-23 14:08:35 -04:00 · 2024-09-23 14:08:35 -04:00 · 12efc95688
parent 197417a65b
commit 12efc95688
3 changed files with 181 additions and 56 deletions
--- a/software/poetry.lock
+++ b/software/poetry.lock
@ -4090,67 +4090,69 @@ sympy = "*"
 [[package]]
 name = "open-interpreter"
-version = "0.3.12"
+version = "0.3.13"
 description = "Let language models run code"
 optional = false
-python-versions = "<4,>=3.9"
+python-versions = ">=3.9,<4"
-files = [
+files = []
-    {file = "open_interpreter-0.3.12-py3-none-any.whl", hash = "sha256:0e155568685ea927c2b4b8139e4ce9bdf03ee0f8f5a71030f07988f72b305721"},
+develop = false
    {file = "open_interpreter-0.3.12.tar.gz", hash = "sha256:ab3ebde071ab19812513880be3dedcccd7c545dbea1b8a31bd054ef6332acbf6"},
 ]
 [package.dependencies]
-astor = ">=0.8.1,<0.9.0"
+astor = "^0.8.1"
-fastapi = {version = ">=0.111.0,<0.112.0", optional = true, markers = "extra == \"server\""}
+fastapi = {version = "^0.111.0", optional = true}
-git-python = ">=1.0.3,<2.0.0"
+git-python = "^1.0.3"
-google-generativeai = ">=0.7.1,<0.8.0"
+google-generativeai = "^0.7.1"
-html2image = ">=2.0.4.3,<3.0.0.0"
+html2image = "^2.0.4.3"
-html2text = ">=2024.2.26,<2025.0.0"
+html2text = "^2024.2.26"
-inquirer = ">=3.1.3,<4.0.0"
+inquirer = "^3.1.3"
-ipykernel = ">=6.26.0,<7.0.0"
+ipykernel = "^6.26.0"
-ipywidgets = {version = ">=8.1.2,<9.0.0", optional = true, markers = "extra == \"os\""}
+ipywidgets = {version = "^8.1.2", optional = true}
-janus = {version = ">=1.0.0,<2.0.0", optional = true, markers = "extra == \"server\""}
+janus = {version = "^1.0.0", optional = true}
-jupyter-client = ">=8.6.0,<9.0.0"
+jupyter-client = "^8.6.0"
-litellm = ">=1.41.26,<2.0.0"
+litellm = "^1.41.26"
-matplotlib = ">=3.8.2,<4.0.0"
+matplotlib = "^3.8.2"
-nltk = ">=3.8.1,<4.0.0"
+opencv-python = {version = "^4.8.1.78", optional = true}
-opencv-python = {version = ">=4.8.1.78,<5.0.0.0", optional = true, markers = "extra == \"os\" or extra == \"local\""}
+platformdirs = "^4.2.0"
-platformdirs = ">=4.2.0,<5.0.0"
+plyer = {version = "^2.1.0", optional = true}
-plyer = {version = ">=2.1.0,<3.0.0", optional = true, markers = "extra == \"os\""}
+psutil = "^5.9.6"
-psutil = ">=5.9.6,<6.0.0"
+pyautogui = {version = "^0.9.54", optional = true}
-pyautogui = {version = ">=0.9.54,<0.10.0", optional = true, markers = "extra == \"os\""}
+pydantic = "^2.6.4"
-pydantic = ">=2.6.4,<3.0.0"
+pyperclip = "^1.9.0"
-pyperclip = ">=1.9.0,<2.0.0"
+pyreadline3 = {version = "^3.4.1", markers = "sys_platform == \"win32\""}
-pyreadline3 = {version = ">=3.4.1,<4.0.0", markers = "sys_platform == \"win32\""}
+pytesseract = {version = "^0.3.10", optional = true}
-pytesseract = {version = ">=0.3.10,<0.4.0", optional = true, markers = "extra == \"os\" or extra == \"local\""}
+pywinctl = {version = "^0.3", optional = true}
-pywinctl = {version = ">=0.3,<0.4", optional = true, markers = "extra == \"os\""}
+pyyaml = "^6.0.1"
-pyyaml = ">=6.0.1,<7.0.0"
+rich = "^13.4.2"
-rich = ">=13.4.2,<14.0.0"
+screeninfo = {version = "^0.8.1", optional = true}
-screeninfo = {version = ">=0.8.1,<0.9.0", optional = true, markers = "extra == \"os\""}
+selenium = "^4.24.0"
-selenium = ">=4.24.0,<5.0.0"
+send2trash = "^1.8.2"
-send2trash = ">=1.8.2,<2.0.0"
+sentence-transformers = {version = "^2.5.1", optional = true}
 sentence-transformers = {version = ">=2.5.1,<3.0.0", optional = true, markers = "extra == \"os\""}
 setuptools = "*"
-shortuuid = ">=1.0.13,<2.0.0"
+shortuuid = "^1.0.13"
-six = ">=1.16.0,<2.0.0"
+six = "^1.16.0"
-starlette = ">=0.37.2,<0.38.0"
+starlette = "^0.37.2"
-tiktoken = ">=0.7.0,<0.8.0"
+tiktoken = "^0.7.0"
-timm = {version = ">=0.9.16,<0.10.0", optional = true, markers = "extra == \"os\""}
+timm = {version = "^0.9.16", optional = true}
-tokentrim = ">=0.1.13,<0.2.0"
+tokentrim = "^0.1.13"
-toml = ">=0.10.2,<0.11.0"
+toml = "^0.10.2"
-torch = {version = ">=2.2.1,<3.0.0", optional = true, markers = "extra == \"os\" or extra == \"local\""}
+typer = "^0.12.4"
-typer = ">=0.12.4,<0.13.0"
+uvicorn = {version = "^0.30.1", optional = true}
-uvicorn = {version = ">=0.30.1,<0.31.0", optional = true, markers = "extra == \"server\""}
+webdriver-manager = "^4.0.2"
-webdriver-manager = ">=4.0.2,<5.0.0"
+wget = "^3.2"
-wget = ">=3.2,<4.0"
+yaspin = "^3.0.2"
 yaspin = ">=3.0.2,<4.0.0"
 [package.extras]
 local = ["easyocr (>=1.7.1,<2.0.0)", "einops (>=0.8.0,<0.9.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "torch (>=2.2.1,<3.0.0)", "torchvision (>=0.18.0,<0.19.0)", "transformers (==4.41.2)"]
-os = ["ipywidgets (>=8.1.2,<9.0.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "plyer (>=2.1.0,<3.0.0)", "pyautogui (>=0.9.54,<0.10.0)", "pytesseract (>=0.3.10,<0.4.0)", "pywinctl (>=0.3,<0.4)", "screeninfo (>=0.8.1,<0.9.0)", "sentence-transformers (>=2.5.1,<3.0.0)", "timm (>=0.9.16,<0.10.0)", "torch (>=2.2.1,<3.0.0)"]
+os = ["ipywidgets (>=8.1.2,<9.0.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "plyer (>=2.1.0,<3.0.0)", "pyautogui (>=0.9.54,<0.10.0)", "pytesseract (>=0.3.10,<0.4.0)", "pywinctl (>=0.3,<0.4)", "screeninfo (>=0.8.1,<0.9.0)", "sentence-transformers (>=2.5.1,<3.0.0)", "timm (>=0.9.16,<0.10.0)"]
 safe = ["semgrep (>=1.52.0,<2.0.0)"]
 server = ["fastapi (>=0.111.0,<0.112.0)", "janus (>=1.0.0,<2.0.0)", "uvicorn (>=0.30.1,<0.31.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/openinterpreter/open-interpreter.git"
 reference = "development"
 resolved_reference = "bd24acd89d3caf113a14109106952de2c793432f"
 [[package]]
 name = "openai"
 version = "1.36.1"
@ -11078,4 +11080,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.12"
-content-hash = "5532f1b0732f4c73e30db78b717afb0fd5cf327f1bf093f073cec113428cc4b9"
+content-hash = "7c40eca9d5b84b65894ddef258e1d7237d1698a62db1a68ee42871fe20d18f6f"
--- a/software/pyproject.toml
+++ b/software/pyproject.toml
@ -19,7 +19,7 @@ livekit-plugins-openai = "^0.8.1"
 livekit-plugins-silero = "^0.6.4"
 livekit-plugins-elevenlabs = "^0.7.3"
 segno = "^1.6.1"
-open-interpreter = {extras = ["os", "server"], version = "^0.3.12"} # You should add a "browser" extra, so selenium isn't in the main package
+open-interpreter = {git = "https://github.com/openinterpreter/open-interpreter.git", rev = "development", extras = ["os", "server"]}
 ngrok = "^1.4.0"
 realtimetts = {extras = ["all"], version = "^0.4.5"}
 realtimestt = "^0.2.41"
--- a/software/source/server/livekit/worker.py
+++ b/software/source/server/livekit/worker.py
@ -1,21 +1,130 @@
 import asyncio
 import copy
 import os
 import re
 from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
-from livekit.agents.llm import ChatContext, ChatMessage
+from livekit.agents.transcription import STTSegmentsForwarder
 from livekit.agents.llm import ChatContext, ChatMessage, LLMStream, ChatChunk, ChoiceDelta, Choice
 from livekit import rtc
 from livekit.agents.voice_assistant import VoiceAssistant
 from livekit.plugins import deepgram, openai, silero, elevenlabs
 from dotenv import load_dotenv
 import sys
 import numpy as np
-
+from typing import AsyncIterator
 load_dotenv()
 start_message = """Hi! You can hold the white circle below to speak to me.
 Try asking what I can do."""
 class ProcessedLLMStream(LLMStream):
    def __init__(
        self,
        original_stream: LLMStream,
        regex_pattern: str = r'<unvoiced code="([^"]+)"></unvoiced>',
    ) -> None:
        super().__init__(chat_ctx=original_stream.chat_ctx, fnc_ctx=original_stream.fnc_ctx)
        self.original_stream = original_stream
        self.regex_pattern = regex_pattern
        self.init_match = '<.*?'                    # match for the '<' and any characters to the left of it
        self.accumulating = False
        self._aiter = self._process_stream()
        self._buffer = ""
    async def _process_stream(self) -> AsyncIterator[ChatChunk]:
        async for chunk in self.original_stream:
            new_choices = []
            for choice in chunk.choices:
                content = choice.delta.content
                if content:
                    init_match = re.search(self.init_match, content)
                    if init_match:
                        print("INITIAL MATCH FOUND!!!!!!")
                        print("INITIAL MATCH FOUND!!!!!!")
                        print("INITIAL MATCH FOUND!!!!!!")
                        print("INITIAL MATCH FOUND!!!!!!")
                        print("INITIAL MATCH FOUND!!!!!!")
                        print("INITIAL MATCH FOUND!!!!!!")
                        print("INITIAL MATCH FOUND!!!!!!")
                        self.accumulating = True
                    if self.accumulating:
                        self._buffer += content
                        print("ACCUMULATING BUFFER!!!")
                        print("ACCUMULATING BUFFER!!!")
                        print("ACCUMULATING BUFFER!!!")
                        print("ACCUMULATING BUFFER!!!")
                        print("ACCUMULATING BUFFER!!!")
                        print("ACCUMULATING BUFFER!!!")
                        match = re.search(self.regex_pattern, self._buffer)
                        if match:
                            code = match.group(1)
                            print(f"Extracted Code: {code}")  
                            # Create a confirmation message
                            confirmation_msg = ChatMessage(
                                role="assistant",
                                content=f"Code extracted: {code}",
                            )
                            # Wrap the confirmation message in ChoiceDelta and Choice
                            choice_delta = ChoiceDelta(
                                role=confirmation_msg.role,
                                content=str(confirmation_msg.content)   # we know confirmation_msg.content is a string
                            )
                            new_choice = Choice(
                                delta=choice_delta,
                                index=choice.index
                            )
                            # Create a new ChatChunk with the confirmation Choice
                            confirmation_chunk = ChatChunk(choices=[new_choice])
                            # Yield the confirmation chunk
                            yield confirmation_chunk
                            self.accumulating = False
                            self._buffer = ""
                        continue  # Skip yielding the original content
                new_choices.append(choice)
            if new_choices:
                yield ChatChunk(choices=new_choices)
    async def __anext__(self) -> ChatChunk:
        try:
            return await self._aiter.__anext__()
        except StopAsyncIteration:
            await self.aclose()
            raise
 def _01_synthesize_assistant_reply(
    assistant: VoiceAssistant, chat_ctx: ChatContext
 ) -> LLMStream:
    """
    Custom function to process the OpenAI compatible endpoint's output.
    Extracts code from responses matching the <unvoiced code=...></unvoiced> pattern.
    Args:
        assistant (VoiceAssistant): The VoiceAssistant instance.
        chat_ctx (ChatContext): The current chat context.
    Returns:
        LLMStream: The processed LLMStream.
    """
    llm_stream = assistant.llm.chat(chat_ctx=chat_ctx, fnc_ctx=assistant.fnc_ctx)
    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
    return ProcessedLLMStream(original_stream=llm_stream)
 # This function is the entrypoint for the agent.
 async def entrypoint(ctx: JobContext):
    # Create an initial chat context with a system prompt
@ -96,6 +205,7 @@ async def entrypoint(ctx: JobContext):
        llm=open_interpreter,  # Language Model
        tts=tts,  # Text-to-Speech
        chat_ctx=initial_ctx,  # Chat history context
        # will_synthesize_assistant_reply=_01_synthesize_assistant_reply,
    )
    chat = rtc.ChatManager(ctx.room)
@ -118,13 +228,26 @@ async def entrypoint(ctx: JobContext):
    await asyncio.sleep(1)
    print("HELLO FROM INSIDE THE WORKER")
    print("HELLO FROM INSIDE THE WORKER")
    print("HELLO FROM INSIDE THE WORKER")
    print("HELLO FROM INSIDE THE WORKER")
    print("HELLO FROM INSIDE THE WORKER")
    # Greets the user with an initial message
    await assistant.say(start_message,
    allow_interruptions=True)
    stt_forwarder = STTSegmentsForwarder(room=ctx.room, participant=ctx.room.local_participant)
    await stt_forwarder._run()
 def main(livekit_url):
-    
+    print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
    print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
    print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
    print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
    print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
    # Workers have to be run as CLIs right now.
    # So we need to simualte running "[this file] dev"
@ -134,5 +257,5 @@ def main(livekit_url):
    # Initialize the worker with the entrypoint
    cli.run_app(
-        WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=livekit_url)
+        WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=livekit_url, port=8082)
    )