8th Architecture

2024-02-08 18:37:23 -08:00 · 2024-02-08 18:37:23 -08:00 · 47b31fb5e1
parent e75fa90a48
commit 47b31fb5e1
8 changed files with 177 additions and 99 deletions
--- a/OS/01/conversations/user.json
+++ b/OS/01/conversations/user.json
@ -1 +1 @@
-[{"role": "user", "type": "message", "content": "\ub2e4\uc74c \uc601\uc0c1\uc5d0\uc11c \ub9cc\ub098\uc694!\n"}]
+[{"role": "user", "type": "message", "content": "Yeah, it's explaining why you have to be a paramedic.\n"}, {"role": "user", "type": "message", "content": "\uc5b4\ub9b4\ub54c \uad1c\ucc2e\uc558\ub294\ub370 \uc544 \uadf8\ub798\uc11c \uc544\uce68\uc5d0 \uc9c4\uc9dc \uc548\uac00\uc9c0\uace0 \uc654\ub098\ubd10\uc694 \uc57c \ub098 \uc6ec\ub9cc\ud07c \ub9db\uc788\ub294\ub370\n"}, {"role": "user", "type": "message", "content": "Like, you'd have to go, like, out of houses.\n"}]
--- a/OS/01/device.py
+++ b/OS/01/device.py
@ -6,6 +6,7 @@ from starlette.websockets import WebSocket
 from queue import Queue
 from pynput import keyboard
 import json
 import traceback
 import websockets
 import queue
 import pydub
@ -13,11 +14,13 @@ import ast
 from pydub import AudioSegment
 from pydub.playback import play
 import io
 import time
 import wave
 import tempfile
 from datetime import datetime
 from utils.check_filtered_kernel import check_filtered_kernel
 from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
 from utils.put_kernel_messages_into_queue import put_kernel_messages_into_queue
 from stt import stt_wav
 # Configuration for Audio Recording
 CHUNK = 1024  # Record in chunks of 1024 samples
@ -36,6 +39,16 @@ if not WS_URL:
 p = pyaudio.PyAudio()
 def record_audio():
    if os.getenv('STT_RUNNER') == "server":
        # STT will happen on the server. we're sending audio.
        send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "start": True})
    elif os.getenv('STT_RUNNER') == "device":
        # STT will happen here, on the device. we're sending text.
        send_queue.put({"role": "user", "type": "message", "start": True})
    else:
        raise Exception("STT_RUNNER must be set to either 'device' or 'server'.")
    """Record audio from the microphone and add it to the queue."""
    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
    print("Recording started...")
@ -64,8 +77,20 @@ def record_audio():
        while byte_data:
            send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)})
            byte_data = audio_file.read(CHUNK)
    if os.getenv('STT_RUNNER') == "device":
        text = stt_wav(wav_path)
        send_queue.put({"role": "user", "type": "message", "content": text})
-    send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
+    if os.getenv('STT_RUNNER') == "server":
        # STT will happen on the server. we sent audio.
        send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
    elif os.getenv('STT_RUNNER') == "device":
        # STT will happen here, on the device. we sent text.
        send_queue.put({"role": "user", "type": "message", "end": True})
    if os.path.exists(wav_path):
        os.remove(wav_path)
 def toggle_recording(state):
@ -114,11 +139,13 @@ async def websocket_communication(WS_URL):
                async for message in websocket:
                    print(message)
                    if "content" in message_so_far:
                        if any(message_so_far[key] != message[key] for key in message_so_far):
                            message_so_far = message
                        else:
-                            message_so_far["content"] += message
+                            message_so_far["content"] += message["content"]
                    if message["type"] == "audio" and "content" in message:
                        audio_bytes = bytes(ast.literal_eval(message["content"]))
@ -139,22 +166,25 @@ async def websocket_communication(WS_URL):
                            code = message_so_far["content"]
                            result = interpreter.computer.run(language, code)
                            send_queue.put(result)
-
+  
        except:
            traceback.print_exc()
            print(f"Connecting to `{WS_URL}`...")
            await asyncio.sleep(2)
-
+            
 def main():
    # Start the WebSocket communication in a separate asyncio event loop
    ws_thread = threading.Thread(target=lambda: asyncio.run(websocket_communication(WS_URL)), daemon=True)
    ws_thread.start()
    # Keyboard listener for spacebar press/release
    with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
        listener.join()
    p.terminate()
 if __name__ == "__main__":
-    main()
+    async def main():
        # Start the WebSocket communication
        asyncio.create_task(websocket_communication(WS_URL))
        # Start watching the kernel if it's your job to do that
        if os.getenv('CODE_RUNNER') == "device":
            asyncio.create_task(put_kernel_messages_into_queue(send_queue))
        # Keyboard listener for spacebar press/release
        listener = keyboard.Listener(on_press=on_press, on_release=on_release)
        listener.start()
    asyncio.run(main())
    p.terminate()
--- a/OS/01/server.py
+++ b/OS/01/server.py
@ -4,19 +4,22 @@ import json
 import time
 import queue
 import os
 import traceback
 from queue import Queue
 from threading import Thread
 import threading
 import uvicorn
 import re
 from fastapi import FastAPI
 from threading import Thread
 from starlette.websockets import WebSocket
-from stt import stt
+from stt import stt_bytes
 from tts import tts
 from pathlib import Path
 import asyncio
 from i import configure_interpreter
 import urllib.parse
 from utils.put_kernel_messages_into_queue import put_kernel_messages_into_queue
 from i import configure_interpreter
 from interpreter import interpreter
 app = FastAPI()
@ -30,10 +33,10 @@ def is_full_sentence(text):
 def split_into_sentences(text):
    return re.split(r'(?<=[.!?])\s+', text)
-# Global queues
+# Queues
-receive_queue = queue.Queue()
+from_computer = queue.Queue() # Just for computer messages from the device. Sync queue because interpreter.run is synchronous
-send_queue = queue.Queue()
+from_user = asyncio.Queue() # Just for user messages from the device.
-recieve_computer_queue = queue.Queue() # Just for computer messages from the device
+to_device = asyncio.Queue() # For messages we send.
 # Switch code executor to device if that's set
@ -56,14 +59,14 @@ if os.getenv('CODE_RUNNER') == "device":
            # Unless it was just sent to the device, send it wrapped in flags
            if not (interpreter.messages and interpreter.messages[-1] == message):
-                send_queue.put({"role": "assistant", "type": "code", "format": "python", "start": True})
+                to_device.put({"role": "assistant", "type": "code", "format": "python", "start": True})
-                send_queue.put(message)
+                to_device.put(message)
-                send_queue.put({"role": "assistant", "type": "code", "format": "python", "end": True})
+                to_device.put({"role": "assistant", "type": "code", "format": "python", "end": True})
            # Stream the response
            print("Waiting for the device to respond...")
            while True:
-                chunk = recieve_computer_queue.get()
+                chunk = from_computer.get()
                print("Server recieved from device:", chunk)
                if "end" in chunk:
                    break
@ -87,47 +90,52 @@ async def websocket_endpoint(websocket: WebSocket):
    await websocket.accept()
    receive_task = asyncio.create_task(receive_messages(websocket))
    send_task = asyncio.create_task(send_messages(websocket))
-    await asyncio.gather(receive_task, send_task)
+    try:
        await asyncio.gather(receive_task, send_task)
    except Exception as e:
        traceback.print_exc()
        print(f"Connection lost. Error: {e}")
 async def receive_messages(websocket: WebSocket):
    while True:
-        data = await websocket.receive_text()
+        data = await websocket.receive_json()
-        if type(data) == dict and data["role"] == "computer":
+        if data["role"] == "computer":
-            recieve_computer_queue.put(data) # To be handled by interpreter.computer.run
+            from_computer.put(data) # To be handled by interpreter.computer.run
        elif data["role"] == "user":
            await from_user.put(data)
        else:
-            receive_queue.put(data)
+            raise("Unknown role:", data)
 async def send_messages(websocket: WebSocket):
    while True:
-        message = await asyncio.get_event_loop().run_in_executor(None, send_queue.get)
+        message = await to_device.get()
        print(message)
        await websocket.send_json(message)
-def queue_listener():
+async def user_listener():
-    audio_file = bytearray()
+    audio_bytes = bytearray()
    while True:
-        # Check 10x a second for new messages
+        message = await from_user.get()
        while receive_queue.empty():
            time.sleep(0.1)
        message = receive_queue.get()
        message = json.loads(message)
        # Hold the audio in a buffer. If it's ready (we got end flag, stt it)
        if message["type"] == "audio":
            if "content" in message:
-                audio_file.extend(bytes(ast.literal_eval(message["content"])))
+                audio_bytes.extend(bytes(ast.literal_eval(message["content"])))
            if "end" in message:
-                content = stt(audio_file, message["format"])
+                content = stt_bytes(audio_bytes, message["format"])
                if content == None: # If it was nothing / silence
                    continue
-                audio_file = bytearray()
+                audio_bytes = bytearray()
                message = {"role": "user", "type": "message", "content": content}
            else:
                continue
        # Ignore flags, we only needed them for audio ^
        if "content" not in message:
            continue
        # Custom stop message will halt us
-        if message.get("content") and message.get("content").lower().strip(".,!") == "stop":
+        if message["content"].lower().strip(".,!") == "stop":
            continue
        # Load, append, and save conversation history
@ -142,53 +150,59 @@ def queue_listener():
        for chunk in interpreter.chat(messages, stream=True):
            # Send it to the user
-            send_queue.put(chunk)
+            await to_device.put(chunk)
            # Speak full sentences out loud
            if chunk["role"] == "assistant" and "content" in chunk:
                print("Chunk role is assistant and content is present in chunk.")
                accumulated_text += chunk["content"]
                print("Accumulated text: ", accumulated_text)
                sentences = split_into_sentences(accumulated_text)
                print("Sentences after splitting: ", sentences)
                if is_full_sentence(sentences[-1]):
                    print("Last sentence is a full sentence.")
                    for sentence in sentences:
-                        print("Streaming sentence: ", sentence)
+                        await stream_or_play_tts(sentence)
                        stream_tts_to_user(sentence)
                    accumulated_text = ""
                    print("Reset accumulated text.")
                else:
                    print("Last sentence is not a full sentence.")
                    for sentence in sentences[:-1]:
-                        print("Streaming sentence: ", sentence)
+                        await stream_or_play_tts(sentence)
                        stream_tts_to_user(sentence)
                    accumulated_text = sentences[-1]
                    print("Accumulated text is now the last sentence: ", accumulated_text)
            # If we have a new message, save our progress and go back to the top
-            if not receive_queue.empty():
+            if not from_user.empty():
                with open(conversation_history_path, 'w') as file:
                    json.dump(interpreter.messages, file)
                break
-def stream_tts_to_user(sentence):
+async def stream_or_play_tts(sentence):
    send_queue.put({"role": "assistant", "type": "audio", "format": "audio/mp3", "start": True})
    audio_bytes = tts(sentence)
    send_queue.put({"role": "assistant", "type": "audio", "format": "audio/mp3", "content": str(audio_bytes)})
    send_queue.put({"role": "assistant", "type": "audio", "format": "audio/mp3", "end": True})
-# Create a thread for the queue listener
+    if os.getenv('TTS_RUNNER') == "server":
-queue_thread = Thread(target=queue_listener)
+        tts(sentence, play_audio=True)
    else:
        await to_device.put({"role": "assistant", "type": "audio", "format": "audio/mp3", "start": True})
        audio_bytes = tts(sentence, play_audio=False)
        await to_device.put({"role": "assistant", "type": "audio", "format": "audio/mp3", "content": str(audio_bytes)})
        await to_device.put({"role": "assistant", "type": "audio", "format": "audio/mp3", "end": True})
-# Start the queue listener thread
+
-queue_thread.start()
+from uvicorn import Config, Server
 # Run the FastAPI app
 if __name__ == "__main__":
-    server_url = os.getenv('SERVER_URL')
+
-    if not server_url:
+    async def main():
-        raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
+        # Start listening to the user
-    parsed_url = urllib.parse.urlparse(server_url)
+        asyncio.create_task(user_listener())
-    print("Starting `server.py`...")
+
-    uvicorn.run(app, host=parsed_url.hostname, port=parsed_url.port)
+        # Start watching the kernel if it's your job to do that
        if os.getenv('CODE_RUNNER') == "server":
            asyncio.create_task(put_kernel_messages_into_queue(from_computer))
        server_url = os.getenv('SERVER_URL')
        if not server_url:
            raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
        parsed_url = urllib.parse.urlparse(server_url)
        print("Starting `server.py`...")
        config = Config(app, host=parsed_url.hostname, port=parsed_url.port, lifespan='on')
        server = Server(config)
        await server.serve()
    asyncio.run(main())
--- a/OS/01/start.sh
+++ b/OS/01/start.sh
@ -12,7 +12,7 @@ export DEVICE_START=True
 # Control where various operations happen— can be `device` or `server`.
 export CODE_RUNNER=server
-export TTS_RUNNER=device # If server, audio will be sent over websocket.
+export TTS_RUNNER=server # If device, audio will be sent over websocket.
 export STT_RUNNER=device # If server, audio will be sent over websocket.
 # Will expose the server publically and display that URL.
@ -22,10 +22,14 @@ export SERVER_EXPOSE_PUBLICALLY=False
 # (for dev, reset the ports we were using)
-PORT=$(echo $SERVER_URL | grep -oE "[0-9]+")
+SERVER_PORT=$(echo $SERVER_URL | grep -oE "[0-9]+")
-lsof -ti tcp:$PORT | xargs kill
+if [ -n "$SERVER_PORT" ]; then
-PORT=$(echo $DEVICE_URL | grep -oE "[0-9]+")
+    lsof -ti tcp:$SERVER_PORT | xargs kill
-lsof -ti tcp:$PORT | xargs kill
+fi
 DEVICE_PORT=$(echo $DEVICE_URL | grep -oE "[0-9]+")
 if [ -n "$DEVICE_PORT" ]; then
    lsof -ti tcp:$DEVICE_PORT | xargs kill
 fi
 # Check the current Python version
 PYTHON_VERSION=$(python -V 2>&1 | cut -d " " -f 2 | cut -d "." -f 1-2)
--- a/OS/01/stt.py
+++ b/OS/01/stt.py
@ -44,18 +44,21 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
        os.remove(input_path)
        os.remove(output_path)
-def stt(audio_bytes: bytearray, mime_type):
+def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):
    with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
-        audio_file = open(wav_file_path, "rb")
+        return stt_wav(wav_file_path)
        try:
            transcript = client.audio.transcriptions.create(
                model="whisper-1", 
                file=audio_file,
                response_format="text"
            )
        except openai.BadRequestError as e:
            print("openai.BadRequestError:", e)
            return None
-        print("Exciting transcription result:", transcript)
+def stt_wav(wav_file_path: str):
-        return transcript
+    audio_file = open(wav_file_path, "rb")
    try:
        transcript = client.audio.transcriptions.create(
            model="whisper-1", 
            file=audio_file,
            response_format="text"
        )
    except openai.BadRequestError as e:
        print("openai.BadRequestError:", e)
        return None
    print("Exciting transcription result:", transcript)
    return transcript
--- a/OS/01/tts.py
+++ b/OS/01/tts.py
@ -9,7 +9,7 @@ from pydub.playback import play
 client = OpenAI()
-def tts(text):
+def tts(text, play_audio):
    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy",
@ -19,9 +19,10 @@ def tts(text):
    with tempfile.NamedTemporaryFile() as temp_file:
        response.stream_to_file(temp_file.name)
-        audio = AudioSegment.from_file(temp_file.name, format="mp3")
+        if play_audio:
-        # Gradual fade in and out over 0.2 seconds
+            audio = AudioSegment.from_file(temp_file.name, format="mp3")
-        audio = audio.fade_in(200).fade_out(200)
+            # Gradual fade in and out over 0.2 seconds
-        play(audio)
+            audio = audio.fade_in(200).fade_out(200)
            play(audio)
        return temp_file.read()
--- a/OS/01/utils/check_filtered_kernel.py
+++ b/OS/01/utils/check_filtered_kernel.py
@ -1,5 +1,13 @@
 """
 Watches the kernel. When it sees something that passes a filter,
 it sends POST request with that to /computer.
 """
 import subprocess
 import time
 import requests
 import platform
 import os
 def get_kernel_messages():
    """
@ -31,10 +39,11 @@ def custom_filter(message):
        return message
    else:
        return None
-
+    
 last_messages = ""
 def check_filtered_kernel():
    while True:
        messages = get_kernel_messages()
        messages.replace(last_messages, "")
        messages = messages.split("\n")
@ -43,4 +52,4 @@ def check_filtered_kernel():
        for message in messages:
            if custom_filter(message):
                filtered_messages.append(message)
-        return filtered_messages
+        return "\n".join(filtered_messages)
--- a/OS/01/utils/put_kernel_messages_into_queue.py
+++ b/OS/01/utils/put_kernel_messages_into_queue.py
@ -0,0 +1,17 @@
 from .check_filtered_kernel import check_filtered_kernel
 import asyncio
 async def put_kernel_messages_into_queue(queue):
    while True:
        text = check_filtered_kernel()
        if text:
            if isinstance(queue, asyncio.Queue):
                await queue.put({"role": "computer", "type": "console", "start": True})
                await queue.put({"role": "computer", "type": "console", "format": "output", "content": text})
                await queue.put({"role": "computer", "type": "console", "end": True})
            else:
                queue.put({"role": "computer", "type": "console", "start": True})
                queue.put({"role": "computer", "type": "console", "format": "output", "content": text})
                queue.put({"role": "computer", "type": "console", "end": True})
        await asyncio.sleep(5)
		`@ -1 +1 @@`
			`[{"role": "user", "type": "message", "content": "\ub2e4\uc74c \uc601\uc0c1\uc5d0\uc11c \ub9cc\ub098\uc694!\n"}]`				`[{"role": "user", "type": "message", "content": "Yeah, it's explaining why you have to be a paramedic.\n"}, {"role": "user", "type": "message", "content": "\uc5b4\ub9b4\ub54c \uad1c\ucc2e\uc558\ub294\ub370 \uc544 \uadf8\ub798\uc11c \uc544\uce68\uc5d0 \uc9c4\uc9dc \uc548\uac00\uc9c0\uace0 \uc654\ub098\ubd10\uc694 \uc57c \ub098 \uc6ec\ub9cc\ud07c \ub9db\uc788\ub294\ub370\n"}, {"role": "user", "type": "message", "content": "Like, you'd have to go, like, out of houses.\n"}]`