8th Architecture

2024-02-08 23:46:33 -08:00 · 2024-02-08 23:46:33 -08:00 · 777ab42f75
parent d0857f7170
commit 777ab42f75
9 changed files with 52 additions and 85 deletions
--- a/OS/01/conversations/user.json
+++ b/OS/01/conversations/user.json
@ -1 +1 @@
-[{"role": "user", "type": "message", "content": "Hi.\n"}]
+[{"role": "user", "type": "message", "content": "Hi, can you hear me?\n"}]
--- a/OS/01/device.py
+++ b/OS/01/device.py
@ -19,7 +19,7 @@ import wave
 import tempfile
 from datetime import datetime
 from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
-from utils.put_kernel_messages_into_queue import put_kernel_messages_into_queue
+from utils.kernel import put_kernel_messages_into_queue
 from stt import stt_wav

 # Configuration for Audio Recording
@ -71,28 +71,33 @@ def record_audio():
    stream.close()
    print("Recording stopped.")

-    # After recording is done, read and stream the audio file in chunks
-    with open(wav_path, 'rb') as audio_file:
-        byte_data = audio_file.read(CHUNK)
-        while byte_data:
-            send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)})
-            byte_data = audio_file.read(CHUNK)
-    
-    if os.getenv('STT_RUNNER') == "device":
-        text = stt_wav(wav_path)
-        send_queue.put({"role": "user", "type": "message", "content": text})
-
-    if os.getenv('STT_RUNNER') == "server":
-        # STT will happen on the server. we sent audio.
-        send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
-    elif os.getenv('STT_RUNNER') == "device":
-        # STT will happen here, on the device. we sent text.
-        send_queue.put({"role": "user", "type": "message", "end": True})
+    duration = wav_file.getnframes() / RATE
+    if duration < 0.3:
+        # Just pressed it. Send stop message
+        if os.getenv('STT_RUNNER') == "device":
+            send_queue.put({"role": "user", "type": "message", "content": "stop"})
+            send_queue.put({"role": "user", "type": "message", "end": True})
+        else:
+            send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": ""})
+            send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
+    else:
+        if os.getenv('STT_RUNNER') == "device":
+            # Run stt then send text
+            text = stt_wav(wav_path)
+            send_queue.put({"role": "user", "type": "message", "content": text})
+            send_queue.put({"role": "user", "type": "message", "end": True})
+        else:
+            # Stream audio
+            with open(wav_path, 'rb') as audio_file:
+                byte_data = audio_file.read(CHUNK)
+                while byte_data:
+                    send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)})
+                    byte_data = audio_file.read(CHUNK)
+            send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})

    if os.path.exists(wav_path):
        os.remove(wav_path)

-
 def toggle_recording(state):
    """Toggle the recording state."""
    global RECORDING, SPACEBAR_PRESSED
--- a/OS/01/i.py
+++ b/OS/01/i.py
@ -1,7 +1,6 @@
 import os
 import glob
 import json
-import requests
 from pathlib import Path

 def configure_interpreter(interpreter):
--- a/OS/01/server.py
+++ b/OS/01/server.py
@ -18,7 +18,7 @@ from tts import tts
 from pathlib import Path
 import asyncio
 import urllib.parse
-from utils.put_kernel_messages_into_queue import put_kernel_messages_into_queue
+from utils.kernel import put_kernel_messages_into_queue
 from i import configure_interpreter
 from interpreter import interpreter

--- a/OS/01/start.sh
+++ b/OS/01/start.sh
@ -49,7 +49,7 @@ fi
 #     brew update
 #     brew install portaudio ffmpeg
 # fi
-python -m pip install -r requirements.txt
+# python -m pip install -r requirements.txt

 ### START

--- a/OS/01/stt.py
+++ b/OS/01/stt.py
@ -60,5 +60,5 @@ def stt_wav(wav_file_path: str):
        print("openai.BadRequestError:", e)
        return None

-    print("Exciting transcription result:", transcript)
+    print("Transcription result:", transcript)
    return transcript
--- a/OS/01/utils/check_filtered_kernel.py
+++ b/OS/01/utils/check_filtered_kernel.py
@ -1,13 +1,6 @@
-"""
-Watches the kernel. When it sees something that passes a filter,
-it sends POST request with that to /computer.
-"""
-
+import asyncio
 import subprocess
-import time
-import requests
 import platform
-import os

 def get_kernel_messages():
    """
@ -43,13 +36,28 @@ def custom_filter(message):
 last_messages = ""

 def check_filtered_kernel():
+    messages = get_kernel_messages()
+    messages.replace(last_messages, "")
+    messages = messages.split("\n")
+    
+    filtered_messages = []
+    for message in messages:
+        if custom_filter(message):
+            filtered_messages.append(message)
+    
+    return "\n".join(filtered_messages)
+
+async def put_kernel_messages_into_queue(queue):
    while True:
-        messages = get_kernel_messages()
-        messages.replace(last_messages, "")
-        messages = messages.split("\n")
+        text = check_filtered_kernel()
+        if text:
+            if isinstance(queue, asyncio.Queue):
+                await queue.put({"role": "computer", "type": "console", "start": True})
+                await queue.put({"role": "computer", "type": "console", "format": "output", "content": text})
+                await queue.put({"role": "computer", "type": "console", "end": True})
+            else:
+                queue.put({"role": "computer", "type": "console", "start": True})
+                queue.put({"role": "computer", "type": "console", "format": "output", "content": text})
+                queue.put({"role": "computer", "type": "console", "end": True})
        
-        filtered_messages = []
-        for message in messages:
-            if custom_filter(message):
-                filtered_messages.append(message)
-        return "\n".join(filtered_messages)
+        await asyncio.sleep(5)
--- a/OS/01/utils/put_kernel_messages_into_queue.py
+++ b/OS/01/utils/put_kernel_messages_into_queue.py
@ -1,17 +0,0 @@
-from .check_filtered_kernel import check_filtered_kernel
-import asyncio
-
-async def put_kernel_messages_into_queue(queue):
-    while True:
-        text = check_filtered_kernel()
-        if text:
-            if isinstance(queue, asyncio.Queue):
-                await queue.put({"role": "computer", "type": "console", "start": True})
-                await queue.put({"role": "computer", "type": "console", "format": "output", "content": text})
-                await queue.put({"role": "computer", "type": "console", "end": True})
-            else:
-                queue.put({"role": "computer", "type": "console", "start": True})
-                queue.put({"role": "computer", "type": "console", "format": "output", "content": text})
-                queue.put({"role": "computer", "type": "console", "end": True})
-        
-        await asyncio.sleep(5)
--- a/OS/01/utils/run_endpoint.py
+++ b/OS/01/utils/run_endpoint.py
@ -1,28 +0,0 @@
-"""
-Exposes a SSE streaming server endpoint at /run, which recieves language and code,
-and streams the output.
-"""
-import os
-import json
-from interpreter import interpreter
-import uvicorn
-
-from fastapi import FastAPI
-from fastapi.responses import StreamingResponse
-from pydantic import BaseModel
-
-class Code(BaseModel):
-    language: str
-    code: str
-
-app = FastAPI()
-
-@app.post("/run")
-async def run_code(code: Code):
-    def generator():
-        for chunk in interpreter.computer.run(code.language, code.code):
-            yield json.dumps(chunk)
-    return StreamingResponse(generator())
-
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('COMPUTER_PORT', 9000)))