Two way websocket in user + settings
This commit is contained in:
parent
5f7d53f0b9
commit
63ab616082
|
@ -6,6 +6,7 @@ Exposes a ws endpoint called /user. Things from there go into the queue. We also
|
|||
In a while loop we watch the queue and handle it.
|
||||
"""
|
||||
|
||||
import os
|
||||
import ast
|
||||
import json
|
||||
import time
|
||||
|
@ -54,7 +55,6 @@ async def websocket_endpoint(websocket: WebSocket):
|
|||
message = to_user.get()
|
||||
await websocket.send_json(message)
|
||||
|
||||
|
||||
def queue_listener():
|
||||
audio_file = bytearray()
|
||||
while True:
|
||||
|
@ -123,4 +123,4 @@ queue_thread.start()
|
|||
|
||||
# Run the FastAPI app
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('ASSISTANT_PORT', 8000)))
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -94,8 +94,8 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run
|
|||
data = {"language": "python", "code": code}
|
||||
|
||||
# Send the data to the /run endpoint
|
||||
response = requests.post("http://localhost:9000/run", json=data, stream=True)
|
||||
|
||||
computer_port = os.getenv('COMPUTER_PORT', '9000')
|
||||
response = requests.post(f"http://localhost:{computer_port}/run", json=data, stream=True)
|
||||
# Stream the response
|
||||
for chunk in response.iter_content(chunk_size=100000000):
|
||||
if chunk: # filter out keep-alive new lines
|
||||
|
|
|
@ -30,6 +30,9 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
|||
with open(input_path, 'wb') as f:
|
||||
f.write(audio)
|
||||
|
||||
# Check if the input file exists
|
||||
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
|
||||
|
||||
# Export to wav
|
||||
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||
|
@ -42,7 +45,6 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
|||
os.remove(input_path)
|
||||
os.remove(output_path)
|
||||
|
||||
|
||||
def stt(audio_bytes: bytearray, mime_type):
|
||||
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||
audio_file = open(wav_file_path, "rb")
|
||||
|
|
|
@ -7,6 +7,7 @@ import subprocess
|
|||
import time
|
||||
import requests
|
||||
import platform
|
||||
import os
|
||||
|
||||
class Device:
|
||||
def __init__(self, device_type, device_info):
|
||||
|
@ -118,8 +119,8 @@ def run_kernel_watch_linux():
|
|||
if custom_filter(message):
|
||||
messages_for_core.append(message)
|
||||
if messages_for_core:
|
||||
requests.post('http://localhost:8000/computer', json = {'messages': messages_for_core})
|
||||
|
||||
port = os.getenv('ASSISTANT_PORT', 8000)
|
||||
requests.post(f'http://localhost:{port}/computer', json = {'messages': messages_for_core})
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
Exposes a SSE streaming server endpoint at /run, which recieves language and code,
|
||||
and streams the output.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from interpreter import interpreter
|
||||
import uvicorn
|
||||
|
@ -20,9 +20,9 @@ app = FastAPI()
|
|||
@app.post("/run")
|
||||
async def run_code(code: Code):
|
||||
def generator():
|
||||
for chunk in interpreter.computer.run(code.language, code.code, stream=True):
|
||||
for chunk in interpreter.computer.run(code.language, code.code):
|
||||
yield json.dumps(chunk)
|
||||
return StreamingResponse(generator())
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(app, host="0.0.0.0", port=9000)
|
||||
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('COMPUTER_PORT', 9000)))
|
||||
|
|
|
@ -7,3 +7,4 @@ uvicorn
|
|||
websockets
|
||||
python-dotenv
|
||||
ffmpeg-python
|
||||
textual
|
|
@ -1,12 +1,22 @@
|
|||
### SETTINGS
|
||||
|
||||
export MODE_01=LIGHT
|
||||
export ASSISTANT_PORT=8000
|
||||
export COMPUTER_PORT=8001
|
||||
|
||||
# Kill whatever's on the ASSISTANT_PORT and COMPUTER_PORT
|
||||
lsof -ti tcp:$ASSISTANT_PORT | xargs kill
|
||||
lsof -ti tcp:$COMPUTER_PORT | xargs kill
|
||||
|
||||
### SETUP
|
||||
|
||||
# INSTALL REQUIREMENTS
|
||||
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
brew update
|
||||
brew install portaudio ffmpeg
|
||||
fi
|
||||
pip install -r requirements.txt
|
||||
# if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
# brew update
|
||||
# brew install portaudio ffmpeg
|
||||
# fi
|
||||
# pip install -r requirements.txt
|
||||
|
||||
### COMPUTER
|
||||
|
||||
|
@ -28,6 +38,8 @@ python computer/run.py &
|
|||
# (disabled, we'll start with hosted services)
|
||||
# python core/llm/start.py &
|
||||
|
||||
sleep 6
|
||||
|
||||
# START ASSISTANT
|
||||
|
||||
python assistant/assistant.py &
|
||||
|
|
|
@ -5,18 +5,14 @@ Connects to a websocket at /user. Sends shit to it, and displays/plays the shit
|
|||
|
||||
For now, just handles a spacebar being pressed— for the duration it's pressed,
|
||||
it should record audio.
|
||||
|
||||
SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
|
||||
sends it to /user in LMC format (role: user, etc)
|
||||
|
||||
MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
|
||||
"""
|
||||
|
||||
import os
|
||||
import pyaudio
|
||||
import threading
|
||||
import asyncio
|
||||
import websockets
|
||||
import websocket
|
||||
import time
|
||||
import json
|
||||
from pynput import keyboard
|
||||
import wave
|
||||
|
@ -35,6 +31,15 @@ recording = False # Flag to control recording state
|
|||
|
||||
ws_chunk_size = 4096 # Websocket stream chunk size
|
||||
|
||||
port = os.getenv('ASSISTANT_PORT', 8000)
|
||||
ws_url = f"ws://localhost:{port}/user"
|
||||
while True:
|
||||
try:
|
||||
ws = websocket.create_connection(ws_url)
|
||||
break
|
||||
except ConnectionRefusedError:
|
||||
time.sleep(1)
|
||||
|
||||
async def start_recording():
|
||||
global recording
|
||||
|
||||
|
|
|
@ -1,13 +1,123 @@
|
|||
"""
|
||||
Handles everything the user interacts through.
|
||||
import asyncio
|
||||
import threading
|
||||
import websockets
|
||||
import os
|
||||
import pyaudio
|
||||
from queue import Queue
|
||||
from pynput import keyboard
|
||||
import json
|
||||
|
||||
Connects to a websocket at /user. Sends shit to it, and displays/plays the shit it sends back.
|
||||
# Configuration for Audio Recording
|
||||
CHUNK = 1024 # Record in chunks of 1024 samples
|
||||
FORMAT = pyaudio.paInt16 # 16 bits per sample
|
||||
CHANNELS = 1 # Mono
|
||||
RATE = 44100 # Sample rate
|
||||
RECORDING = False # Flag to control recording state
|
||||
SPACEBAR_PRESSED = False # Flag to track spacebar press state
|
||||
|
||||
For now, just handles a spacebar being pressed— for the duration it's pressed,
|
||||
it should record audio.
|
||||
# Configuration for WebSocket
|
||||
PORT = os.getenv('ASSISTANT_PORT', '8000')
|
||||
WS_URL = f"ws://localhost:{PORT}/user"
|
||||
|
||||
SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
|
||||
sends it to /user in LMC format (role: user, etc)
|
||||
# Initialize PyAudio
|
||||
p = pyaudio.PyAudio()
|
||||
|
||||
MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
|
||||
"""
|
||||
# Queue for sending data
|
||||
data_queue = Queue()
|
||||
|
||||
import wave
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def record_audio():
|
||||
"""Record audio from the microphone and add it to the queue."""
|
||||
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
|
||||
print("Recording started...")
|
||||
global RECORDING
|
||||
|
||||
# Create a temporary WAV file to store the audio data
|
||||
temp_dir = tempfile.gettempdir()
|
||||
wav_path = os.path.join(temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
wav_file = wave.open(wav_path, 'wb')
|
||||
wav_file.setnchannels(CHANNELS)
|
||||
wav_file.setsampwidth(p.get_sample_size(FORMAT))
|
||||
wav_file.setframerate(RATE)
|
||||
|
||||
while RECORDING:
|
||||
data = stream.read(CHUNK, exception_on_overflow=False)
|
||||
wav_file.writeframes(data)
|
||||
|
||||
wav_file.close()
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
print("Recording stopped.")
|
||||
|
||||
# After recording is done, read and stream the audio file in chunks
|
||||
with open(wav_path, 'rb') as audio_file:
|
||||
byte_data = audio_file.read(CHUNK)
|
||||
while byte_data:
|
||||
data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)})
|
||||
byte_data = audio_file.read(CHUNK)
|
||||
|
||||
data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
|
||||
|
||||
|
||||
def toggle_recording(state):
|
||||
"""Toggle the recording state."""
|
||||
global RECORDING, SPACEBAR_PRESSED
|
||||
if state and not SPACEBAR_PRESSED:
|
||||
SPACEBAR_PRESSED = True
|
||||
if not RECORDING:
|
||||
RECORDING = True
|
||||
threading.Thread(target=record_audio).start()
|
||||
elif not state and SPACEBAR_PRESSED:
|
||||
SPACEBAR_PRESSED = False
|
||||
RECORDING = False
|
||||
|
||||
async def websocket_communication():
|
||||
"""Handle WebSocket communication and listen for incoming messages."""
|
||||
async with websockets.connect(WS_URL) as websocket:
|
||||
while True:
|
||||
# Send data from the queue to the server
|
||||
while not data_queue.empty():
|
||||
data = data_queue.get_nowait()
|
||||
await websocket.send(json.dumps(data))
|
||||
|
||||
# Listen for incoming messages from the server
|
||||
try:
|
||||
incoming_message = await asyncio.wait_for(websocket.recv(), timeout=1.0)
|
||||
print(f"Received from server: {incoming_message}")
|
||||
except asyncio.TimeoutError:
|
||||
# No message received within timeout period
|
||||
pass
|
||||
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
|
||||
def on_press(key):
|
||||
"""Detect spacebar press."""
|
||||
if key == keyboard.Key.space:
|
||||
toggle_recording(True)
|
||||
|
||||
def on_release(key):
|
||||
"""Detect spacebar release."""
|
||||
if key == keyboard.Key.space:
|
||||
toggle_recording(False)
|
||||
|
||||
def main():
|
||||
import time
|
||||
time.sleep(10)
|
||||
# Start the WebSocket communication in a separate asyncio event loop
|
||||
ws_thread = threading.Thread(target=lambda: asyncio.run(websocket_communication()), daemon=True)
|
||||
ws_thread.start()
|
||||
|
||||
# Keyboard listener for spacebar press/release
|
||||
with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
|
||||
print("Press the spacebar to start/stop recording. Press ESC to exit.")
|
||||
listener.join()
|
||||
|
||||
p.terminate()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
Loading…
Reference in New Issue