add async-interpreter
This commit is contained in:
parent
c35d4c08f4
commit
10681b552f
File diff suppressed because one or more lines are too long
|
@ -28,12 +28,23 @@ psutil = "^5.9.8"
|
||||||
typer = "^0.9.0"
|
typer = "^0.9.0"
|
||||||
platformdirs = "^4.2.0"
|
platformdirs = "^4.2.0"
|
||||||
rich = "^13.7.1"
|
rich = "^13.7.1"
|
||||||
open-interpreter = {extras = ["os"], version = "^0.2.4"}
|
|
||||||
dateparser = "^1.2.0"
|
|
||||||
pytimeparse = "^1.1.8"
|
pytimeparse = "^1.1.8"
|
||||||
python-crontab = "^3.0.0"
|
python-crontab = "^3.0.0"
|
||||||
inquirer = "^3.2.4"
|
inquirer = "^3.2.4"
|
||||||
pyqrcode = "^1.2.1"
|
pyqrcode = "^1.2.1"
|
||||||
|
realtimestt = "^0.1.12"
|
||||||
|
realtimetts = "^0.3.44"
|
||||||
|
keyboard = "^0.13.5"
|
||||||
|
pyautogui = "^0.9.54"
|
||||||
|
ctranslate2 = "4.1.0"
|
||||||
|
py3-tts = "^3.5"
|
||||||
|
elevenlabs = "0.2.27"
|
||||||
|
groq = "^0.5.0"
|
||||||
|
open-interpreter = "^0.2.5"
|
||||||
|
litellm = "1.35.35"
|
||||||
|
openai = "1.13.3"
|
||||||
|
pywebview = "*"
|
||||||
|
pyobjc = "*"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core"]
|
requires = ["poetry-core"]
|
||||||
|
|
|
@ -60,12 +60,18 @@ CAMERA_WARMUP_SECONDS = float(os.getenv("CAMERA_WARMUP_SECONDS", 0))
|
||||||
# Specify OS
|
# Specify OS
|
||||||
current_platform = get_system_info()
|
current_platform = get_system_info()
|
||||||
|
|
||||||
|
|
||||||
def is_win11():
|
def is_win11():
|
||||||
return sys.getwindowsversion().build >= 22000
|
return sys.getwindowsversion().build >= 22000
|
||||||
|
|
||||||
|
|
||||||
def is_win10():
|
def is_win10():
|
||||||
try:
|
try:
|
||||||
return platform.system() == "Windows" and "10" in platform.version() and not is_win11()
|
return (
|
||||||
|
platform.system() == "Windows"
|
||||||
|
and "10" in platform.version()
|
||||||
|
and not is_win11()
|
||||||
|
)
|
||||||
except:
|
except:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -267,19 +273,18 @@ class Device:
|
||||||
def on_press(self, key):
|
def on_press(self, key):
|
||||||
"""Detect spacebar press and Ctrl+C combination."""
|
"""Detect spacebar press and Ctrl+C combination."""
|
||||||
self.pressed_keys.add(key) # Add the pressed key to the set
|
self.pressed_keys.add(key) # Add the pressed key to the set
|
||||||
|
|
||||||
|
|
||||||
if keyboard.Key.space in self.pressed_keys:
|
if keyboard.Key.space in self.pressed_keys:
|
||||||
self.toggle_recording(True)
|
self.toggle_recording(True)
|
||||||
elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char('c')} <= self.pressed_keys:
|
elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char("c")} <= self.pressed_keys:
|
||||||
logger.info("Ctrl+C pressed. Exiting...")
|
logger.info("Ctrl+C pressed. Exiting...")
|
||||||
kill_process_tree()
|
kill_process_tree()
|
||||||
os._exit(0)
|
os._exit(0)
|
||||||
|
|
||||||
# Windows alternative to the above
|
# Windows alternative to the above
|
||||||
if key == keyboard.Key.ctrl_l:
|
if key == keyboard.Key.ctrl_l:
|
||||||
self.ctrl_pressed = True
|
self.ctrl_pressed = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if key.vk == 67 and self.ctrl_pressed:
|
if key.vk == 67 and self.ctrl_pressed:
|
||||||
logger.info("Ctrl+C pressed. Exiting...")
|
logger.info("Ctrl+C pressed. Exiting...")
|
||||||
|
@ -289,17 +294,17 @@ class Device:
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def on_release(self, key):
|
def on_release(self, key):
|
||||||
"""Detect spacebar release and 'c' key press for camera, and handle key release."""
|
"""Detect spacebar release and 'c' key press for camera, and handle key release."""
|
||||||
self.pressed_keys.discard(key) # Remove the released key from the key press tracking set
|
self.pressed_keys.discard(
|
||||||
|
key
|
||||||
|
) # Remove the released key from the key press tracking set
|
||||||
|
|
||||||
if key == keyboard.Key.ctrl_l:
|
if key == keyboard.Key.ctrl_l:
|
||||||
self.ctrl_pressed = False
|
self.ctrl_pressed = False
|
||||||
if key == keyboard.Key.space:
|
if key == keyboard.Key.space:
|
||||||
self.toggle_recording(False)
|
self.toggle_recording(False)
|
||||||
elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char('c'):
|
elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char("c"):
|
||||||
self.fetch_image_from_camera()
|
self.fetch_image_from_camera()
|
||||||
|
|
||||||
async def message_sender(self, websocket):
|
async def message_sender(self, websocket):
|
||||||
|
@ -307,14 +312,18 @@ class Device:
|
||||||
message = await asyncio.get_event_loop().run_in_executor(
|
message = await asyncio.get_event_loop().run_in_executor(
|
||||||
None, send_queue.get
|
None, send_queue.get
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(message, bytes):
|
if isinstance(message, bytes):
|
||||||
await websocket.send(message)
|
await websocket.send(message)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
await websocket.send(json.dumps(message))
|
await websocket.send(json.dumps(message))
|
||||||
|
|
||||||
send_queue.task_done()
|
send_queue.task_done()
|
||||||
await asyncio.sleep(0.01)
|
await asyncio.sleep(0.01)
|
||||||
|
|
||||||
async def websocket_communication(self, WS_URL):
|
async def websocket_communication(self, WS_URL):
|
||||||
|
print("websocket communication was called!!!!")
|
||||||
show_connection_log = True
|
show_connection_log = True
|
||||||
|
|
||||||
async def exec_ws_communication(websocket):
|
async def exec_ws_communication(websocket):
|
||||||
|
@ -331,8 +340,8 @@ class Device:
|
||||||
await asyncio.sleep(0.01)
|
await asyncio.sleep(0.01)
|
||||||
chunk = await websocket.recv()
|
chunk = await websocket.recv()
|
||||||
|
|
||||||
logger.debug(f"Got this message from the server: {type(chunk)} {chunk}")
|
# logger.debug(f"Got this message from the server: {type(chunk)} {chunk}")
|
||||||
|
print((f"Got this message from the server: {type(chunk)} {chunk}"))
|
||||||
if type(chunk) == str:
|
if type(chunk) == str:
|
||||||
chunk = json.loads(chunk)
|
chunk = json.loads(chunk)
|
||||||
|
|
||||||
|
@ -369,7 +378,7 @@ class Device:
|
||||||
code = message["content"]
|
code = message["content"]
|
||||||
result = interpreter.computer.run(language, code)
|
result = interpreter.computer.run(language, code)
|
||||||
send_queue.put(result)
|
send_queue.put(result)
|
||||||
|
|
||||||
if is_win10():
|
if is_win10():
|
||||||
logger.info("Windows 10 detected")
|
logger.info("Windows 10 detected")
|
||||||
# Workaround for Windows 10 not latching to the websocket server.
|
# Workaround for Windows 10 not latching to the websocket server.
|
||||||
|
@ -380,20 +389,22 @@ class Device:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error while attempting to connect: {e}")
|
logger.error(f"Error while attempting to connect: {e}")
|
||||||
else:
|
else:
|
||||||
|
print("websocket url is", WS_URL)
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
async with websockets.connect(WS_URL) as websocket:
|
async with websockets.connect(WS_URL) as websocket:
|
||||||
await exec_ws_communication(websocket)
|
await exec_ws_communication(websocket)
|
||||||
except:
|
except:
|
||||||
logger.debug(traceback.format_exc())
|
logger.info(traceback.format_exc())
|
||||||
if show_connection_log:
|
if show_connection_log:
|
||||||
logger.info(f"Connecting to `{WS_URL}`...")
|
logger.info(f"Connecting to `{WS_URL}`...")
|
||||||
show_connection_log = False
|
show_connection_log = False
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
async def start_async(self):
|
async def start_async(self):
|
||||||
|
print("start async was called!!!!!")
|
||||||
# Configuration for WebSocket
|
# Configuration for WebSocket
|
||||||
WS_URL = f"ws://{self.server_url}"
|
WS_URL = f"ws://{self.server_url}/ws"
|
||||||
# Start the WebSocket communication
|
# Start the WebSocket communication
|
||||||
asyncio.create_task(self.websocket_communication(WS_URL))
|
asyncio.create_task(self.websocket_communication(WS_URL))
|
||||||
|
|
||||||
|
@ -430,8 +441,10 @@ class Device:
|
||||||
on_press=self.on_press, on_release=self.on_release
|
on_press=self.on_press, on_release=self.on_release
|
||||||
)
|
)
|
||||||
listener.start()
|
listener.start()
|
||||||
|
print("listener for keyboard started!!!!!")
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
|
print("device was started!!!!!!")
|
||||||
if os.getenv("TEACH_MODE") != "True":
|
if os.getenv("TEACH_MODE") != "True":
|
||||||
asyncio.run(self.start_async())
|
asyncio.run(self.start_async())
|
||||||
p.terminate()
|
p.terminate()
|
||||||
|
|
|
@ -0,0 +1,119 @@
|
||||||
|
import asyncio
|
||||||
|
import traceback
|
||||||
|
import json
|
||||||
|
from fastapi import FastAPI, WebSocket, Header
|
||||||
|
from uvicorn import Config, Server
|
||||||
|
from interpreter import interpreter as base_interpreter
|
||||||
|
from .async_interpreter import AsyncInterpreter
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
from openai import OpenAI
|
||||||
|
from pydantic import BaseModel
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["STT_RUNNER"] = "server"
|
||||||
|
os.environ["TTS_RUNNER"] = "server"
|
||||||
|
|
||||||
|
# Parse command line arguments for port number
|
||||||
|
parser = argparse.ArgumentParser(description="FastAPI server.")
|
||||||
|
parser.add_argument("--port", type=int, default=8000, help="Port to run on.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
base_interpreter.tts = "openai"
|
||||||
|
base_interpreter.llm.model = "gpt-4-turbo"
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
interpreter = AsyncInterpreter(base_interpreter)
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"],
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"], # Allow all methods (GET, POST, etc.)
|
||||||
|
allow_headers=["*"], # Allow all headers
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.post("/load_chat")
|
||||||
|
async def load_chat(messages: List[Dict[str, Any]]):
|
||||||
|
interpreter.interpreter.messages = messages
|
||||||
|
interpreter.active_chat_messages = messages
|
||||||
|
print("🪼🪼🪼🪼🪼🪼 Messages loaded: ", interpreter.active_chat_messages)
|
||||||
|
return {"status": "success"}
|
||||||
|
|
||||||
|
@app.websocket("/ws")
|
||||||
|
async def websocket_endpoint(websocket: WebSocket):
|
||||||
|
await websocket.accept()
|
||||||
|
try:
|
||||||
|
|
||||||
|
async def receive_input():
|
||||||
|
while True:
|
||||||
|
data = await websocket.receive()
|
||||||
|
|
||||||
|
if isinstance(data, bytes):
|
||||||
|
await interpreter.input(data)
|
||||||
|
elif "bytes" in data:
|
||||||
|
await interpreter.input(data["bytes"])
|
||||||
|
print("SERVER FEEDING AUDIO")
|
||||||
|
elif "text" in data:
|
||||||
|
print("RECEIVED INPUT", data)
|
||||||
|
await interpreter.input(data["text"])
|
||||||
|
|
||||||
|
async def send_output():
|
||||||
|
while True:
|
||||||
|
output = await interpreter.output()
|
||||||
|
if isinstance(output, bytes):
|
||||||
|
await websocket.send_bytes(output)
|
||||||
|
# we dont send out bytes rn, no TTS
|
||||||
|
pass
|
||||||
|
elif isinstance(output, dict):
|
||||||
|
await websocket.send_text(json.dumps(output))
|
||||||
|
|
||||||
|
await asyncio.gather(receive_input(), send_output())
|
||||||
|
except Exception as e:
|
||||||
|
print(f"WebSocket connection closed with exception: {e}")
|
||||||
|
traceback.print_exc()
|
||||||
|
finally:
|
||||||
|
await websocket.close()
|
||||||
|
|
||||||
|
config = Config(app, host="0.0.0.0", port=8000, lifespan="on")
|
||||||
|
server = Server(config)
|
||||||
|
await server.serve()
|
||||||
|
|
||||||
|
class Rename(BaseModel):
|
||||||
|
input: str
|
||||||
|
|
||||||
|
@app.post("/rename-chat")
|
||||||
|
async def rename_chat(body_content: Rename, x_api_key: str = Header(None)):
|
||||||
|
print("RENAME CHAT REQUEST in PY 🌙🌙🌙🌙")
|
||||||
|
input_value = body_content.input
|
||||||
|
client = OpenAI(
|
||||||
|
# defaults to os.environ.get("OPENAI_API_KEY")
|
||||||
|
api_key=x_api_key,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"Given the following chat snippet, create a unique and descriptive title in less than 8 words. Your answer must not be related to customer service.\n\n{input_value}",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
temperature=0.3,
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
completion = response["choices"][0]["message"]["content"]
|
||||||
|
return {"data": {"content": completion}}
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
traceback.print_exc()
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
|
@ -0,0 +1,211 @@
|
||||||
|
# This is a websocket interpreter, TTS and STT disabled.
|
||||||
|
# It makes a websocket on port 8000 that sends/recieves LMC messages in *streaming* format.
|
||||||
|
|
||||||
|
### You MUST send a start and end flag with each message! For example: ###
|
||||||
|
|
||||||
|
"""
|
||||||
|
{"role": "user", "type": "message", "start": True})
|
||||||
|
{"role": "user", "type": "message", "content": "hi"})
|
||||||
|
{"role": "user", "type": "message", "end": True})
|
||||||
|
"""
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
from pynput import keyboard
|
||||||
|
from RealtimeTTS import TextToAudioStream, OpenAIEngine, CoquiEngine
|
||||||
|
from RealtimeSTT import AudioToTextRecorder
|
||||||
|
import time
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncInterpreter:
|
||||||
|
def __init__(self, interpreter):
|
||||||
|
self.interpreter = interpreter
|
||||||
|
|
||||||
|
# STT
|
||||||
|
self.stt = AudioToTextRecorder(use_microphone=False)
|
||||||
|
self.stt.stop() # It needs this for some reason
|
||||||
|
|
||||||
|
# TTS
|
||||||
|
if self.interpreter.tts == "coqui":
|
||||||
|
engine = CoquiEngine()
|
||||||
|
elif self.interpreter.tts == "openai":
|
||||||
|
engine = OpenAIEngine()
|
||||||
|
self.tts = TextToAudioStream(engine)
|
||||||
|
|
||||||
|
self.active_chat_messages = []
|
||||||
|
|
||||||
|
self._input_queue = asyncio.Queue() # Queue that .input will shove things into
|
||||||
|
self._output_queue = asyncio.Queue() # Queue to put output chunks into
|
||||||
|
self._last_lmc_start_flag = None # Unix time of last LMC start flag recieved
|
||||||
|
self._in_keyboard_write_block = (
|
||||||
|
False # Tracks whether interpreter is trying to use the keyboard
|
||||||
|
)
|
||||||
|
self.loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
|
async def _add_to_queue(self, queue, item):
|
||||||
|
await queue.put(item)
|
||||||
|
|
||||||
|
async def clear_queue(self, queue):
|
||||||
|
while not queue.empty():
|
||||||
|
await queue.get()
|
||||||
|
|
||||||
|
async def clear_input_queue(self):
|
||||||
|
await self.clear_queue(self._input_queue)
|
||||||
|
|
||||||
|
async def clear_output_queue(self):
|
||||||
|
await self.clear_queue(self._output_queue)
|
||||||
|
|
||||||
|
async def input(self, chunk):
|
||||||
|
"""
|
||||||
|
Expects a chunk in streaming LMC format.
|
||||||
|
"""
|
||||||
|
if isinstance(chunk, bytes):
|
||||||
|
# It's probably a chunk of audio
|
||||||
|
self.stt.feed_audio(chunk)
|
||||||
|
print("INTERPRETER FEEDING AUDIO")
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
try:
|
||||||
|
chunk = json.loads(chunk)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if "start" in chunk:
|
||||||
|
print("input received")
|
||||||
|
self.stt.start()
|
||||||
|
self._last_lmc_start_flag = time.time()
|
||||||
|
# self.interpreter.computer.terminal.stop() # Stop any code execution... maybe we should make interpreter.stop()?
|
||||||
|
elif "end" in chunk:
|
||||||
|
print("running oi on input now")
|
||||||
|
asyncio.create_task(self.run())
|
||||||
|
else:
|
||||||
|
await self._add_to_queue(self._input_queue, chunk)
|
||||||
|
|
||||||
|
def add_to_output_queue_sync(self, chunk):
|
||||||
|
"""
|
||||||
|
Synchronous function to add a chunk to the output queue.
|
||||||
|
"""
|
||||||
|
print("ADDING TO QUEUE:", chunk)
|
||||||
|
asyncio.create_task(self._add_to_queue(self._output_queue, chunk))
|
||||||
|
|
||||||
|
async def run(self):
|
||||||
|
"""
|
||||||
|
Runs OI on the audio bytes submitted to the input. Will add streaming LMC chunks to the _output_queue.
|
||||||
|
"""
|
||||||
|
self.interpreter.messages = self.active_chat_messages
|
||||||
|
|
||||||
|
# self.beeper.start()
|
||||||
|
|
||||||
|
self.stt.stop()
|
||||||
|
# message = self.stt.text()
|
||||||
|
# print("THE MESSAGE:", message)
|
||||||
|
|
||||||
|
# accumulates the input queue message
|
||||||
|
input_queue = []
|
||||||
|
while not self._input_queue.empty():
|
||||||
|
input_queue.append(self._input_queue.get())
|
||||||
|
|
||||||
|
print("INPUT QUEUE:", input_queue)
|
||||||
|
# message = [i for i in input_queue if i["type"] == "message"][0]["content"]
|
||||||
|
# message = self.stt.text()
|
||||||
|
|
||||||
|
message = "hello"
|
||||||
|
print(message)
|
||||||
|
|
||||||
|
# print(message)
|
||||||
|
def generate(message):
|
||||||
|
last_lmc_start_flag = self._last_lmc_start_flag
|
||||||
|
self.interpreter.messages = self.active_chat_messages
|
||||||
|
print(
|
||||||
|
"🍀🍀🍀🍀GENERATING, using these messages: ", self.interpreter.messages
|
||||||
|
)
|
||||||
|
print("🍀 🍀 🍀 🍀 active_chat_messages: ", self.active_chat_messages)
|
||||||
|
print("message is", message)
|
||||||
|
|
||||||
|
for chunk in self.interpreter.chat(message, display=True, stream=True):
|
||||||
|
|
||||||
|
if self._last_lmc_start_flag != last_lmc_start_flag:
|
||||||
|
# self.beeper.stop()
|
||||||
|
break
|
||||||
|
|
||||||
|
# self.add_to_output_queue_sync(chunk) # To send text, not just audio
|
||||||
|
|
||||||
|
content = chunk.get("content")
|
||||||
|
|
||||||
|
# Handle message blocks
|
||||||
|
if chunk.get("type") == "message":
|
||||||
|
if content:
|
||||||
|
# self.beeper.stop()
|
||||||
|
|
||||||
|
# Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer
|
||||||
|
# content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ")
|
||||||
|
|
||||||
|
yield content
|
||||||
|
|
||||||
|
# Handle code blocks
|
||||||
|
elif chunk.get("type") == "code":
|
||||||
|
if "start" in chunk:
|
||||||
|
# self.beeper.start()
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Experimental: If the AI wants to type, we should type immediatly
|
||||||
|
if (
|
||||||
|
self.interpreter.messages[-1]
|
||||||
|
.get("content", "")
|
||||||
|
.startswith("computer.keyboard.write(")
|
||||||
|
):
|
||||||
|
keyboard.controller.type(content)
|
||||||
|
self._in_keyboard_write_block = True
|
||||||
|
if "end" in chunk and self._in_keyboard_write_block:
|
||||||
|
self._in_keyboard_write_block = False
|
||||||
|
# (This will make it so it doesn't type twice when the block executes)
|
||||||
|
if self.interpreter.messages[-1]["content"].startswith(
|
||||||
|
"computer.keyboard.write("
|
||||||
|
):
|
||||||
|
self.interpreter.messages[-1]["content"] = (
|
||||||
|
"dummy_variable = ("
|
||||||
|
+ self.interpreter.messages[-1]["content"][
|
||||||
|
len("computer.keyboard.write(") :
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Send a completion signal
|
||||||
|
|
||||||
|
# self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"})
|
||||||
|
|
||||||
|
# Feed generate to RealtimeTTS
|
||||||
|
self.add_to_output_queue_sync(
|
||||||
|
{"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
|
||||||
|
)
|
||||||
|
self.tts.feed(generate(message))
|
||||||
|
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True)
|
||||||
|
while True:
|
||||||
|
if self.tts.is_playing():
|
||||||
|
break
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
while True:
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
print("is_playing", self.tts.is_playing())
|
||||||
|
if not self.tts.is_playing():
|
||||||
|
self.add_to_output_queue_sync(
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"type": "audio",
|
||||||
|
"format": "bytes.wav",
|
||||||
|
"end": True,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
async def _on_tts_chunk_async(self, chunk):
|
||||||
|
print("SENDING TTS CHUNK")
|
||||||
|
await self._add_to_queue(self._output_queue, chunk)
|
||||||
|
|
||||||
|
def on_tts_chunk(self, chunk):
|
||||||
|
asyncio.run(self._on_tts_chunk_async(chunk))
|
||||||
|
|
||||||
|
async def output(self):
|
||||||
|
return await self._output_queue.get()
|
|
@ -5,7 +5,7 @@ import threading
|
||||||
import os
|
import os
|
||||||
import importlib
|
import importlib
|
||||||
from source.server.tunnel import create_tunnel
|
from source.server.tunnel import create_tunnel
|
||||||
from source.server.server import main
|
from source.server.ai_server import main
|
||||||
from source.server.utils.local_mode import select_local_model
|
from source.server.utils.local_mode import select_local_model
|
||||||
|
|
||||||
import signal
|
import signal
|
||||||
|
@ -22,7 +22,7 @@ def run(
|
||||||
help="Specify the server host where the server will deploy",
|
help="Specify the server host where the server will deploy",
|
||||||
),
|
),
|
||||||
server_port: int = typer.Option(
|
server_port: int = typer.Option(
|
||||||
10001,
|
8000,
|
||||||
"--server-port",
|
"--server-port",
|
||||||
help="Specify the server port where the server will deploy",
|
help="Specify the server port where the server will deploy",
|
||||||
),
|
),
|
||||||
|
@ -103,7 +103,7 @@ def run(
|
||||||
def _run(
|
def _run(
|
||||||
server: bool = False,
|
server: bool = False,
|
||||||
server_host: str = "0.0.0.0",
|
server_host: str = "0.0.0.0",
|
||||||
server_port: int = 10001,
|
server_port: int = 8000,
|
||||||
tunnel_service: str = "bore",
|
tunnel_service: str = "bore",
|
||||||
expose: bool = False,
|
expose: bool = False,
|
||||||
client: bool = False,
|
client: bool = False,
|
||||||
|
@ -127,7 +127,7 @@ def _run(
|
||||||
# llm_service = "llamafile"
|
# llm_service = "llamafile"
|
||||||
stt_service = "local-whisper"
|
stt_service = "local-whisper"
|
||||||
select_local_model()
|
select_local_model()
|
||||||
|
|
||||||
system_type = platform.system()
|
system_type = platform.system()
|
||||||
if system_type == "Windows":
|
if system_type == "Windows":
|
||||||
server_host = "localhost"
|
server_host = "localhost"
|
||||||
|
@ -138,8 +138,6 @@ def _run(
|
||||||
if not server and not client:
|
if not server and not client:
|
||||||
server = True
|
server = True
|
||||||
client = True
|
client = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def handle_exit(signum, frame):
|
def handle_exit(signum, frame):
|
||||||
os._exit(0)
|
os._exit(0)
|
||||||
|
@ -154,18 +152,18 @@ def _run(
|
||||||
target=loop.run_until_complete,
|
target=loop.run_until_complete,
|
||||||
args=(
|
args=(
|
||||||
main(
|
main(
|
||||||
server_host,
|
# server_host,
|
||||||
server_port,
|
# server_port,
|
||||||
llm_service,
|
# llm_service,
|
||||||
model,
|
# model,
|
||||||
llm_supports_vision,
|
# llm_supports_vision,
|
||||||
llm_supports_functions,
|
# llm_supports_functions,
|
||||||
context_window,
|
# context_window,
|
||||||
max_tokens,
|
# max_tokens,
|
||||||
temperature,
|
# temperature,
|
||||||
tts_service,
|
# tts_service,
|
||||||
stt_service,
|
# stt_service,
|
||||||
mobile,
|
# mobile,
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
@ -182,6 +180,7 @@ def _run(
|
||||||
system_type = platform.system()
|
system_type = platform.system()
|
||||||
if system_type == "Darwin": # Mac OS
|
if system_type == "Darwin": # Mac OS
|
||||||
client_type = "mac"
|
client_type = "mac"
|
||||||
|
print("initiating mac device with base device!!!")
|
||||||
elif system_type == "Windows": # Windows System
|
elif system_type == "Windows": # Windows System
|
||||||
client_type = "windows"
|
client_type = "windows"
|
||||||
elif system_type == "Linux": # Linux System
|
elif system_type == "Linux": # Linux System
|
||||||
|
@ -197,7 +196,9 @@ def _run(
|
||||||
module = importlib.import_module(
|
module = importlib.import_module(
|
||||||
f".clients.{client_type}.device", package="source"
|
f".clients.{client_type}.device", package="source"
|
||||||
)
|
)
|
||||||
|
server_url = "0.0.0.0:8000"
|
||||||
client_thread = threading.Thread(target=module.main, args=[server_url])
|
client_thread = threading.Thread(target=module.main, args=[server_url])
|
||||||
|
print("client thread started")
|
||||||
client_thread.start()
|
client_thread.start()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in New Issue