update docs and remove comments

2024-06-19 15:15:58 -07:00 · 2024-06-19 15:15:58 -07:00 · 564255adee
parent d162ee69a3
commit 564255adee
8 changed files with 19 additions and 64 deletions
--- a/README.md
+++ b/README.md
@ -127,7 +127,9 @@ If you want to run local speech-to-text using Whisper, you must install Rust. Fo
 ## Customizations
-To customize the behavior of the system, edit the [system message, model, skills library path,](https://docs.openinterpreter.com/settings/all-settings) etc. in `i.py`. This file sets up an interpreter, and is powered by Open Interpreter.
+To customize the behavior of the system, edit the [system message, model, skills library path,](https://docs.openinterpreter.com/settings/all-settings) etc. in the `profiles` directory under the `server` directory. This file sets up an interpreter, and is powered by Open Interpreter.
 To specify the text-to-speech service for the 01 `base_device.py`, set `interpreter.tts` to either "openai" for OpenAI, "elevenlabs" for ElevenLabs, or "coqui" for Coqui (local) in a profile.
 ## Ubuntu Dependencies
--- a/software/source/clients/base_device.py
+++ b/software/source/clients/base_device.py
@ -91,7 +91,6 @@ class Device:
        self.server_url = ""
        self.ctrl_pressed = False
        self.tts_service = ""
        self.playback_latency = None
    def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
        """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
@ -165,10 +164,6 @@ class Device:
        while True:
            try:
                audio = await self.audiosegments.get()
                if self.playback_latency and isinstance(audio, bytes):
                    elapsed_time = time.time() - self.playback_latency
                    print(f"Time from request to playback: {elapsed_time} seconds")
                    self.playback_latency = None
                if self.tts_service == "elevenlabs":
                    mpv_process.stdin.write(audio)  # type: ignore
@ -224,7 +219,6 @@ class Device:
        stream.stop_stream()
        stream.close()
        print("Recording stopped.")
        self.playback_latency = time.time()
        duration = wav_file.getnframes() / RATE
        if duration < 0.3:
--- a/software/source/server/async_interpreter.py
+++ b/software/source/server/async_interpreter.py
@ -22,11 +22,6 @@ import os
 class AsyncInterpreter:
    def __init__(self, interpreter):
        self.stt_latency = None
        self.tts_latency = None
        self.interpreter_latency = None
        self.time_from_first_yield_to_first_put = None
        self.interpreter = interpreter
        # STT
@ -128,9 +123,7 @@ class AsyncInterpreter:
                    # Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer
                    # content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ")
-                    print("yielding ", content)
+                    # print("yielding ", content)
                    if self.time_from_first_yield_to_first_put is None:
                        self.time_from_first_yield_to_first_put = time.time()
                    yield content
@ -162,9 +155,6 @@ class AsyncInterpreter:
                        )
        # Send a completion signal
        end_interpreter = time.time()
        self.interpreter_latency = end_interpreter - start_interpreter
        print("INTERPRETER LATENCY", self.interpreter_latency)
        # self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"})
    async def run(self):
@ -179,11 +169,7 @@ class AsyncInterpreter:
        while not self._input_queue.empty():
            input_queue.append(self._input_queue.get())
        start_stt = time.time()
        message = self.stt.text()
        end_stt = time.time()
        self.stt_latency = end_stt - start_stt
        print("STT LATENCY", self.stt_latency)
        print(message)
@ -210,23 +196,11 @@ class AsyncInterpreter:
                        "end": True,
                    }
                )
                end_tts = time.time()
                self.tts_latency = end_tts - self.tts.stream_start_time
                print("TTS LATENCY", self.tts_latency)
                self.tts.stop()
                break
    async def _on_tts_chunk_async(self, chunk):
-        print("adding chunk to queue")
+        # print("adding chunk to queue")
        if (
            self.time_from_first_yield_to_first_put is not None
            and self.time_from_first_yield_to_first_put != 0
        ):
            print(
                "time from first yield to first put is ",
                time.time() - self.time_from_first_yield_to_first_put,
            )
            self.time_from_first_yield_to_first_put = 0
        await self._add_to_queue(self._output_queue, chunk)
    def on_tts_chunk(self, chunk):
@ -234,8 +208,5 @@ class AsyncInterpreter:
        asyncio.run(self._on_tts_chunk_async(chunk))
    async def output(self):
-        print("outputting chunks")
+        # print("outputting chunks")
        return await self._output_queue.get()
    def shutdown(self):
        self.stt.shutdown()
--- a/software/source/server/async_server.py
+++ b/software/source/server/async_server.py
@ -1,9 +1,13 @@
-# TODO: import from the profiles directory the interpreter that should be served!!
+# import from the profiles directory the interpreter to be served
-from .profiles.fast import interpreter as base_interpreter
+# add other profiles to the directory to define other interpreter instances and import them here
 # {.profiles.fast: optimizes for STT/TTS latency with the fastest models }
 # {.profiles.local: uses local models and local STT/TTS }
 # {.profiles.default: uses default interpreter settings with optimized TTS latency }
 # from .profiles.fast import interpreter as base_interpreter
 # from .profiles.local import interpreter as base_interpreter
-# from .profiles.default import interpreter as base_interpreter
+from .profiles.default import interpreter as base_interpreter
 import asyncio
 import traceback
--- a/software/source/server/conftest.py
+++ b/software/source/server/conftest.py
@ -1,3 +1,5 @@
 # tests currently hang after completion
 """
 import pytest
 import signal
--- a/software/source/server/profiles/default.py
+++ b/software/source/server/profiles/default.py
@ -3,9 +3,9 @@ from interpreter import interpreter
 # This is an Open Interpreter compatible profile.
 # Visit https://01.openinterpreter.com/profile for all options.
-# 01 suports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
+# 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
 # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
-interpreter.tts = "openai"
+interpreter.tts = "elevenlabs"
 # Connect your 01 to a language model
 interpreter.llm.model = "gpt-4-turbo"
--- a/software/source/server/profiles/fast.py
+++ b/software/source/server/profiles/fast.py
@ -3,7 +3,7 @@ from interpreter import interpreter
 # This is an Open Interpreter compatible profile.
 # Visit https://01.openinterpreter.com/profile for all options.
-# 01 suports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
+# 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
 # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
 interpreter.tts = "elevenlabs"
@ -16,27 +16,9 @@ interpreter.llm.context_window = 2048
 interpreter.llm.max_tokens = 4096
 interpreter.llm.temperature = 0.8
 # interpreter.llm.api_key = os.environ["GROQ_API_KEY"]
 interpreter.computer.import_computer_api = False
 interpreter.auto_run = True
 interpreter.system_message = (
    "You are a helpful assistant that can answer questions and help with tasks."
 )
 # TODO: include other options in comments in the profiles for tts
 # direct people to the profiles directory to make changes to the interpreter profile
 # this should be made explicit on the docs
 """
    llm_service: str = "litellm",
    model: str = "gpt-4",
    llm_supports_vision: bool = False,
    llm_supports_functions: bool = False,
    context_window: int = 2048,
    max_tokens: int = 4096,
    temperature: float = 0.8,
    tts_service: str = "elevenlabs",
    stt_service: str = "openai",
 """
--- a/software/source/server/profiles/local.py
+++ b/software/source/server/profiles/local.py
@ -1,6 +1,6 @@
 from interpreter import interpreter
-# 01 suports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
+# 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
 # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
 interpreter.tts = "coqui"