Two way websocket in user + settings
This commit is contained in:
parent
5f7d53f0b9
commit
63ab616082
|
@ -6,6 +6,7 @@ Exposes a ws endpoint called /user. Things from there go into the queue. We also
|
||||||
In a while loop we watch the queue and handle it.
|
In a while loop we watch the queue and handle it.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
import ast
|
import ast
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
@ -54,7 +55,6 @@ async def websocket_endpoint(websocket: WebSocket):
|
||||||
message = to_user.get()
|
message = to_user.get()
|
||||||
await websocket.send_json(message)
|
await websocket.send_json(message)
|
||||||
|
|
||||||
|
|
||||||
def queue_listener():
|
def queue_listener():
|
||||||
audio_file = bytearray()
|
audio_file = bytearray()
|
||||||
while True:
|
while True:
|
||||||
|
@ -123,4 +123,4 @@ queue_thread.start()
|
||||||
|
|
||||||
# Run the FastAPI app
|
# Run the FastAPI app
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('ASSISTANT_PORT', 8000)))
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -94,8 +94,8 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run
|
||||||
data = {"language": "python", "code": code}
|
data = {"language": "python", "code": code}
|
||||||
|
|
||||||
# Send the data to the /run endpoint
|
# Send the data to the /run endpoint
|
||||||
response = requests.post("http://localhost:9000/run", json=data, stream=True)
|
computer_port = os.getenv('COMPUTER_PORT', '9000')
|
||||||
|
response = requests.post(f"http://localhost:{computer_port}/run", json=data, stream=True)
|
||||||
# Stream the response
|
# Stream the response
|
||||||
for chunk in response.iter_content(chunk_size=100000000):
|
for chunk in response.iter_content(chunk_size=100000000):
|
||||||
if chunk: # filter out keep-alive new lines
|
if chunk: # filter out keep-alive new lines
|
||||||
|
|
|
@ -30,6 +30,9 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
||||||
with open(input_path, 'wb') as f:
|
with open(input_path, 'wb') as f:
|
||||||
f.write(audio)
|
f.write(audio)
|
||||||
|
|
||||||
|
# Check if the input file exists
|
||||||
|
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
|
||||||
|
|
||||||
# Export to wav
|
# Export to wav
|
||||||
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||||
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||||
|
@ -42,7 +45,6 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
||||||
os.remove(input_path)
|
os.remove(input_path)
|
||||||
os.remove(output_path)
|
os.remove(output_path)
|
||||||
|
|
||||||
|
|
||||||
def stt(audio_bytes: bytearray, mime_type):
|
def stt(audio_bytes: bytearray, mime_type):
|
||||||
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||||
audio_file = open(wav_file_path, "rb")
|
audio_file = open(wav_file_path, "rb")
|
||||||
|
|
|
@ -7,6 +7,7 @@ import subprocess
|
||||||
import time
|
import time
|
||||||
import requests
|
import requests
|
||||||
import platform
|
import platform
|
||||||
|
import os
|
||||||
|
|
||||||
class Device:
|
class Device:
|
||||||
def __init__(self, device_type, device_info):
|
def __init__(self, device_type, device_info):
|
||||||
|
@ -118,8 +119,8 @@ def run_kernel_watch_linux():
|
||||||
if custom_filter(message):
|
if custom_filter(message):
|
||||||
messages_for_core.append(message)
|
messages_for_core.append(message)
|
||||||
if messages_for_core:
|
if messages_for_core:
|
||||||
requests.post('http://localhost:8000/computer', json = {'messages': messages_for_core})
|
port = os.getenv('ASSISTANT_PORT', 8000)
|
||||||
|
requests.post(f'http://localhost:{port}/computer', json = {'messages': messages_for_core})
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
Exposes a SSE streaming server endpoint at /run, which recieves language and code,
|
Exposes a SSE streaming server endpoint at /run, which recieves language and code,
|
||||||
and streams the output.
|
and streams the output.
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
import json
|
import json
|
||||||
from interpreter import interpreter
|
from interpreter import interpreter
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
@ -20,9 +20,9 @@ app = FastAPI()
|
||||||
@app.post("/run")
|
@app.post("/run")
|
||||||
async def run_code(code: Code):
|
async def run_code(code: Code):
|
||||||
def generator():
|
def generator():
|
||||||
for chunk in interpreter.computer.run(code.language, code.code, stream=True):
|
for chunk in interpreter.computer.run(code.language, code.code):
|
||||||
yield json.dumps(chunk)
|
yield json.dumps(chunk)
|
||||||
return StreamingResponse(generator())
|
return StreamingResponse(generator())
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
uvicorn.run(app, host="0.0.0.0", port=9000)
|
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('COMPUTER_PORT', 9000)))
|
||||||
|
|
|
@ -7,3 +7,4 @@ uvicorn
|
||||||
websockets
|
websockets
|
||||||
python-dotenv
|
python-dotenv
|
||||||
ffmpeg-python
|
ffmpeg-python
|
||||||
|
textual
|
|
@ -1,12 +1,22 @@
|
||||||
|
### SETTINGS
|
||||||
|
|
||||||
|
export MODE_01=LIGHT
|
||||||
|
export ASSISTANT_PORT=8000
|
||||||
|
export COMPUTER_PORT=8001
|
||||||
|
|
||||||
|
# Kill whatever's on the ASSISTANT_PORT and COMPUTER_PORT
|
||||||
|
lsof -ti tcp:$ASSISTANT_PORT | xargs kill
|
||||||
|
lsof -ti tcp:$COMPUTER_PORT | xargs kill
|
||||||
|
|
||||||
### SETUP
|
### SETUP
|
||||||
|
|
||||||
# INSTALL REQUIREMENTS
|
# INSTALL REQUIREMENTS
|
||||||
|
|
||||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
# if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||||
brew update
|
# brew update
|
||||||
brew install portaudio ffmpeg
|
# brew install portaudio ffmpeg
|
||||||
fi
|
# fi
|
||||||
pip install -r requirements.txt
|
# pip install -r requirements.txt
|
||||||
|
|
||||||
### COMPUTER
|
### COMPUTER
|
||||||
|
|
||||||
|
@ -28,6 +38,8 @@ python computer/run.py &
|
||||||
# (disabled, we'll start with hosted services)
|
# (disabled, we'll start with hosted services)
|
||||||
# python core/llm/start.py &
|
# python core/llm/start.py &
|
||||||
|
|
||||||
|
sleep 6
|
||||||
|
|
||||||
# START ASSISTANT
|
# START ASSISTANT
|
||||||
|
|
||||||
python assistant/assistant.py &
|
python assistant/assistant.py &
|
||||||
|
|
|
@ -5,18 +5,14 @@ Connects to a websocket at /user. Sends shit to it, and displays/plays the shit
|
||||||
|
|
||||||
For now, just handles a spacebar being pressed— for the duration it's pressed,
|
For now, just handles a spacebar being pressed— for the duration it's pressed,
|
||||||
it should record audio.
|
it should record audio.
|
||||||
|
|
||||||
SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
|
|
||||||
sends it to /user in LMC format (role: user, etc)
|
|
||||||
|
|
||||||
MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import pyaudio
|
import pyaudio
|
||||||
import threading
|
import threading
|
||||||
import asyncio
|
import asyncio
|
||||||
import websockets
|
import websocket
|
||||||
|
import time
|
||||||
import json
|
import json
|
||||||
from pynput import keyboard
|
from pynput import keyboard
|
||||||
import wave
|
import wave
|
||||||
|
@ -35,6 +31,15 @@ recording = False # Flag to control recording state
|
||||||
|
|
||||||
ws_chunk_size = 4096 # Websocket stream chunk size
|
ws_chunk_size = 4096 # Websocket stream chunk size
|
||||||
|
|
||||||
|
port = os.getenv('ASSISTANT_PORT', 8000)
|
||||||
|
ws_url = f"ws://localhost:{port}/user"
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
ws = websocket.create_connection(ws_url)
|
||||||
|
break
|
||||||
|
except ConnectionRefusedError:
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
async def start_recording():
|
async def start_recording():
|
||||||
global recording
|
global recording
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,123 @@
|
||||||
"""
|
import asyncio
|
||||||
Handles everything the user interacts through.
|
import threading
|
||||||
|
import websockets
|
||||||
|
import os
|
||||||
|
import pyaudio
|
||||||
|
from queue import Queue
|
||||||
|
from pynput import keyboard
|
||||||
|
import json
|
||||||
|
|
||||||
Connects to a websocket at /user. Sends shit to it, and displays/plays the shit it sends back.
|
# Configuration for Audio Recording
|
||||||
|
CHUNK = 1024 # Record in chunks of 1024 samples
|
||||||
|
FORMAT = pyaudio.paInt16 # 16 bits per sample
|
||||||
|
CHANNELS = 1 # Mono
|
||||||
|
RATE = 44100 # Sample rate
|
||||||
|
RECORDING = False # Flag to control recording state
|
||||||
|
SPACEBAR_PRESSED = False # Flag to track spacebar press state
|
||||||
|
|
||||||
For now, just handles a spacebar being pressed— for the duration it's pressed,
|
# Configuration for WebSocket
|
||||||
it should record audio.
|
PORT = os.getenv('ASSISTANT_PORT', '8000')
|
||||||
|
WS_URL = f"ws://localhost:{PORT}/user"
|
||||||
|
|
||||||
SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
|
# Initialize PyAudio
|
||||||
sends it to /user in LMC format (role: user, etc)
|
p = pyaudio.PyAudio()
|
||||||
|
|
||||||
MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
|
# Queue for sending data
|
||||||
"""
|
data_queue = Queue()
|
||||||
|
|
||||||
|
import wave
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def record_audio():
|
||||||
|
"""Record audio from the microphone and add it to the queue."""
|
||||||
|
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
|
||||||
|
print("Recording started...")
|
||||||
|
global RECORDING
|
||||||
|
|
||||||
|
# Create a temporary WAV file to store the audio data
|
||||||
|
temp_dir = tempfile.gettempdir()
|
||||||
|
wav_path = os.path.join(temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||||
|
wav_file = wave.open(wav_path, 'wb')
|
||||||
|
wav_file.setnchannels(CHANNELS)
|
||||||
|
wav_file.setsampwidth(p.get_sample_size(FORMAT))
|
||||||
|
wav_file.setframerate(RATE)
|
||||||
|
|
||||||
|
while RECORDING:
|
||||||
|
data = stream.read(CHUNK, exception_on_overflow=False)
|
||||||
|
wav_file.writeframes(data)
|
||||||
|
|
||||||
|
wav_file.close()
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
print("Recording stopped.")
|
||||||
|
|
||||||
|
# After recording is done, read and stream the audio file in chunks
|
||||||
|
with open(wav_path, 'rb') as audio_file:
|
||||||
|
byte_data = audio_file.read(CHUNK)
|
||||||
|
while byte_data:
|
||||||
|
data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)})
|
||||||
|
byte_data = audio_file.read(CHUNK)
|
||||||
|
|
||||||
|
data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
|
||||||
|
|
||||||
|
|
||||||
|
def toggle_recording(state):
|
||||||
|
"""Toggle the recording state."""
|
||||||
|
global RECORDING, SPACEBAR_PRESSED
|
||||||
|
if state and not SPACEBAR_PRESSED:
|
||||||
|
SPACEBAR_PRESSED = True
|
||||||
|
if not RECORDING:
|
||||||
|
RECORDING = True
|
||||||
|
threading.Thread(target=record_audio).start()
|
||||||
|
elif not state and SPACEBAR_PRESSED:
|
||||||
|
SPACEBAR_PRESSED = False
|
||||||
|
RECORDING = False
|
||||||
|
|
||||||
|
async def websocket_communication():
|
||||||
|
"""Handle WebSocket communication and listen for incoming messages."""
|
||||||
|
async with websockets.connect(WS_URL) as websocket:
|
||||||
|
while True:
|
||||||
|
# Send data from the queue to the server
|
||||||
|
while not data_queue.empty():
|
||||||
|
data = data_queue.get_nowait()
|
||||||
|
await websocket.send(json.dumps(data))
|
||||||
|
|
||||||
|
# Listen for incoming messages from the server
|
||||||
|
try:
|
||||||
|
incoming_message = await asyncio.wait_for(websocket.recv(), timeout=1.0)
|
||||||
|
print(f"Received from server: {incoming_message}")
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
# No message received within timeout period
|
||||||
|
pass
|
||||||
|
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
|
|
||||||
|
def on_press(key):
|
||||||
|
"""Detect spacebar press."""
|
||||||
|
if key == keyboard.Key.space:
|
||||||
|
toggle_recording(True)
|
||||||
|
|
||||||
|
def on_release(key):
|
||||||
|
"""Detect spacebar release."""
|
||||||
|
if key == keyboard.Key.space:
|
||||||
|
toggle_recording(False)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
import time
|
||||||
|
time.sleep(10)
|
||||||
|
# Start the WebSocket communication in a separate asyncio event loop
|
||||||
|
ws_thread = threading.Thread(target=lambda: asyncio.run(websocket_communication()), daemon=True)
|
||||||
|
ws_thread.start()
|
||||||
|
|
||||||
|
# Keyboard listener for spacebar press/release
|
||||||
|
with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
|
||||||
|
print("Press the spacebar to start/stop recording. Press ESC to exit.")
|
||||||
|
listener.join()
|
||||||
|
|
||||||
|
p.terminate()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
Loading…
Reference in New Issue