update docs and remove comments
This commit is contained in:
parent
d162ee69a3
commit
564255adee
|
@ -127,7 +127,9 @@ If you want to run local speech-to-text using Whisper, you must install Rust. Fo
|
||||||
|
|
||||||
## Customizations
|
## Customizations
|
||||||
|
|
||||||
To customize the behavior of the system, edit the [system message, model, skills library path,](https://docs.openinterpreter.com/settings/all-settings) etc. in `i.py`. This file sets up an interpreter, and is powered by Open Interpreter.
|
To customize the behavior of the system, edit the [system message, model, skills library path,](https://docs.openinterpreter.com/settings/all-settings) etc. in the `profiles` directory under the `server` directory. This file sets up an interpreter, and is powered by Open Interpreter.
|
||||||
|
|
||||||
|
To specify the text-to-speech service for the 01 `base_device.py`, set `interpreter.tts` to either "openai" for OpenAI, "elevenlabs" for ElevenLabs, or "coqui" for Coqui (local) in a profile.
|
||||||
|
|
||||||
## Ubuntu Dependencies
|
## Ubuntu Dependencies
|
||||||
|
|
||||||
|
|
|
@ -91,7 +91,6 @@ class Device:
|
||||||
self.server_url = ""
|
self.server_url = ""
|
||||||
self.ctrl_pressed = False
|
self.ctrl_pressed = False
|
||||||
self.tts_service = ""
|
self.tts_service = ""
|
||||||
self.playback_latency = None
|
|
||||||
|
|
||||||
def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
|
def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
|
||||||
"""Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
|
"""Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
|
||||||
|
@ -165,10 +164,6 @@ class Device:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
audio = await self.audiosegments.get()
|
audio = await self.audiosegments.get()
|
||||||
if self.playback_latency and isinstance(audio, bytes):
|
|
||||||
elapsed_time = time.time() - self.playback_latency
|
|
||||||
print(f"Time from request to playback: {elapsed_time} seconds")
|
|
||||||
self.playback_latency = None
|
|
||||||
|
|
||||||
if self.tts_service == "elevenlabs":
|
if self.tts_service == "elevenlabs":
|
||||||
mpv_process.stdin.write(audio) # type: ignore
|
mpv_process.stdin.write(audio) # type: ignore
|
||||||
|
@ -224,7 +219,6 @@ class Device:
|
||||||
stream.stop_stream()
|
stream.stop_stream()
|
||||||
stream.close()
|
stream.close()
|
||||||
print("Recording stopped.")
|
print("Recording stopped.")
|
||||||
self.playback_latency = time.time()
|
|
||||||
|
|
||||||
duration = wav_file.getnframes() / RATE
|
duration = wav_file.getnframes() / RATE
|
||||||
if duration < 0.3:
|
if duration < 0.3:
|
||||||
|
|
|
@ -22,11 +22,6 @@ import os
|
||||||
|
|
||||||
class AsyncInterpreter:
|
class AsyncInterpreter:
|
||||||
def __init__(self, interpreter):
|
def __init__(self, interpreter):
|
||||||
self.stt_latency = None
|
|
||||||
self.tts_latency = None
|
|
||||||
self.interpreter_latency = None
|
|
||||||
self.time_from_first_yield_to_first_put = None
|
|
||||||
|
|
||||||
self.interpreter = interpreter
|
self.interpreter = interpreter
|
||||||
|
|
||||||
# STT
|
# STT
|
||||||
|
@ -128,9 +123,7 @@ class AsyncInterpreter:
|
||||||
|
|
||||||
# Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer
|
# Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer
|
||||||
# content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ")
|
# content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ")
|
||||||
print("yielding ", content)
|
# print("yielding ", content)
|
||||||
if self.time_from_first_yield_to_first_put is None:
|
|
||||||
self.time_from_first_yield_to_first_put = time.time()
|
|
||||||
|
|
||||||
yield content
|
yield content
|
||||||
|
|
||||||
|
@ -162,9 +155,6 @@ class AsyncInterpreter:
|
||||||
)
|
)
|
||||||
|
|
||||||
# Send a completion signal
|
# Send a completion signal
|
||||||
end_interpreter = time.time()
|
|
||||||
self.interpreter_latency = end_interpreter - start_interpreter
|
|
||||||
print("INTERPRETER LATENCY", self.interpreter_latency)
|
|
||||||
# self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"})
|
# self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"})
|
||||||
|
|
||||||
async def run(self):
|
async def run(self):
|
||||||
|
@ -179,11 +169,7 @@ class AsyncInterpreter:
|
||||||
while not self._input_queue.empty():
|
while not self._input_queue.empty():
|
||||||
input_queue.append(self._input_queue.get())
|
input_queue.append(self._input_queue.get())
|
||||||
|
|
||||||
start_stt = time.time()
|
|
||||||
message = self.stt.text()
|
message = self.stt.text()
|
||||||
end_stt = time.time()
|
|
||||||
self.stt_latency = end_stt - start_stt
|
|
||||||
print("STT LATENCY", self.stt_latency)
|
|
||||||
|
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
|
@ -210,23 +196,11 @@ class AsyncInterpreter:
|
||||||
"end": True,
|
"end": True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
end_tts = time.time()
|
|
||||||
self.tts_latency = end_tts - self.tts.stream_start_time
|
|
||||||
print("TTS LATENCY", self.tts_latency)
|
|
||||||
self.tts.stop()
|
self.tts.stop()
|
||||||
break
|
break
|
||||||
|
|
||||||
async def _on_tts_chunk_async(self, chunk):
|
async def _on_tts_chunk_async(self, chunk):
|
||||||
print("adding chunk to queue")
|
# print("adding chunk to queue")
|
||||||
if (
|
|
||||||
self.time_from_first_yield_to_first_put is not None
|
|
||||||
and self.time_from_first_yield_to_first_put != 0
|
|
||||||
):
|
|
||||||
print(
|
|
||||||
"time from first yield to first put is ",
|
|
||||||
time.time() - self.time_from_first_yield_to_first_put,
|
|
||||||
)
|
|
||||||
self.time_from_first_yield_to_first_put = 0
|
|
||||||
await self._add_to_queue(self._output_queue, chunk)
|
await self._add_to_queue(self._output_queue, chunk)
|
||||||
|
|
||||||
def on_tts_chunk(self, chunk):
|
def on_tts_chunk(self, chunk):
|
||||||
|
@ -234,8 +208,5 @@ class AsyncInterpreter:
|
||||||
asyncio.run(self._on_tts_chunk_async(chunk))
|
asyncio.run(self._on_tts_chunk_async(chunk))
|
||||||
|
|
||||||
async def output(self):
|
async def output(self):
|
||||||
print("outputting chunks")
|
# print("outputting chunks")
|
||||||
return await self._output_queue.get()
|
return await self._output_queue.get()
|
||||||
|
|
||||||
def shutdown(self):
|
|
||||||
self.stt.shutdown()
|
|
||||||
|
|
|
@ -1,9 +1,13 @@
|
||||||
# TODO: import from the profiles directory the interpreter that should be served!!
|
# import from the profiles directory the interpreter to be served
|
||||||
|
|
||||||
from .profiles.fast import interpreter as base_interpreter
|
# add other profiles to the directory to define other interpreter instances and import them here
|
||||||
|
# {.profiles.fast: optimizes for STT/TTS latency with the fastest models }
|
||||||
|
# {.profiles.local: uses local models and local STT/TTS }
|
||||||
|
# {.profiles.default: uses default interpreter settings with optimized TTS latency }
|
||||||
|
|
||||||
|
# from .profiles.fast import interpreter as base_interpreter
|
||||||
# from .profiles.local import interpreter as base_interpreter
|
# from .profiles.local import interpreter as base_interpreter
|
||||||
# from .profiles.default import interpreter as base_interpreter
|
from .profiles.default import interpreter as base_interpreter
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import traceback
|
import traceback
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
# tests currently hang after completion
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import pytest
|
import pytest
|
||||||
import signal
|
import signal
|
||||||
|
|
|
@ -3,9 +3,9 @@ from interpreter import interpreter
|
||||||
# This is an Open Interpreter compatible profile.
|
# This is an Open Interpreter compatible profile.
|
||||||
# Visit https://01.openinterpreter.com/profile for all options.
|
# Visit https://01.openinterpreter.com/profile for all options.
|
||||||
|
|
||||||
# 01 suports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
|
# 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
|
||||||
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
|
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
|
||||||
interpreter.tts = "openai"
|
interpreter.tts = "elevenlabs"
|
||||||
|
|
||||||
# Connect your 01 to a language model
|
# Connect your 01 to a language model
|
||||||
interpreter.llm.model = "gpt-4-turbo"
|
interpreter.llm.model = "gpt-4-turbo"
|
||||||
|
|
|
@ -3,7 +3,7 @@ from interpreter import interpreter
|
||||||
# This is an Open Interpreter compatible profile.
|
# This is an Open Interpreter compatible profile.
|
||||||
# Visit https://01.openinterpreter.com/profile for all options.
|
# Visit https://01.openinterpreter.com/profile for all options.
|
||||||
|
|
||||||
# 01 suports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
|
# 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
|
||||||
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
|
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
|
||||||
interpreter.tts = "elevenlabs"
|
interpreter.tts = "elevenlabs"
|
||||||
|
|
||||||
|
@ -16,27 +16,9 @@ interpreter.llm.context_window = 2048
|
||||||
interpreter.llm.max_tokens = 4096
|
interpreter.llm.max_tokens = 4096
|
||||||
interpreter.llm.temperature = 0.8
|
interpreter.llm.temperature = 0.8
|
||||||
|
|
||||||
# interpreter.llm.api_key = os.environ["GROQ_API_KEY"]
|
|
||||||
|
|
||||||
interpreter.computer.import_computer_api = False
|
interpreter.computer.import_computer_api = False
|
||||||
|
|
||||||
interpreter.auto_run = True
|
interpreter.auto_run = True
|
||||||
interpreter.system_message = (
|
interpreter.system_message = (
|
||||||
"You are a helpful assistant that can answer questions and help with tasks."
|
"You are a helpful assistant that can answer questions and help with tasks."
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: include other options in comments in the profiles for tts
|
|
||||||
# direct people to the profiles directory to make changes to the interpreter profile
|
|
||||||
# this should be made explicit on the docs
|
|
||||||
|
|
||||||
"""
|
|
||||||
llm_service: str = "litellm",
|
|
||||||
model: str = "gpt-4",
|
|
||||||
llm_supports_vision: bool = False,
|
|
||||||
llm_supports_functions: bool = False,
|
|
||||||
context_window: int = 2048,
|
|
||||||
max_tokens: int = 4096,
|
|
||||||
temperature: float = 0.8,
|
|
||||||
tts_service: str = "elevenlabs",
|
|
||||||
stt_service: str = "openai",
|
|
||||||
"""
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from interpreter import interpreter
|
from interpreter import interpreter
|
||||||
|
|
||||||
# 01 suports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
|
# 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
|
||||||
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
|
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
|
||||||
interpreter.tts = "coqui"
|
interpreter.tts = "coqui"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue