`bash OS/01/start.sh`
This commit is contained in:
parent
43d3c4e266
commit
525fa27ceb
|
@ -6,7 +6,7 @@ Exposes a ws endpoint called /user. Things from there go into the queue. We also
|
|||
In a while loop we watch the queue and handle it.
|
||||
"""
|
||||
|
||||
import os
|
||||
from starlette.websockets import WebSocketDisconnect
|
||||
import ast
|
||||
import json
|
||||
import time
|
||||
|
@ -21,12 +21,12 @@ from starlette.websockets import WebSocket
|
|||
from create_interpreter import create_interpreter
|
||||
from stt import stt
|
||||
from tts import tts
|
||||
from pathlib import Path
|
||||
|
||||
# Create interpreter
|
||||
interpreter = create_interpreter()
|
||||
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
conversation_history_path = os.path.join(script_dir, 'conversations', 'user.json')
|
||||
conversation_history_path = Path(__file__).parent / 'conversations' / 'user.json'
|
||||
|
||||
# Create Queue objects
|
||||
to_user = queue.Queue()
|
||||
|
@ -49,11 +49,16 @@ async def read_computer(item: dict):
|
|||
async def websocket_endpoint(websocket: WebSocket):
|
||||
await websocket.accept()
|
||||
while True:
|
||||
data = await websocket.receive_json()
|
||||
to_assistant.put(data)
|
||||
while not to_user.empty():
|
||||
message = to_user.get()
|
||||
await websocket.send_json(message)
|
||||
try:
|
||||
data = await websocket.receive_json()
|
||||
to_assistant.put(data)
|
||||
while not to_user.empty():
|
||||
message = to_user.get()
|
||||
print("sending a message!")
|
||||
await websocket.send_json(message)
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
|
||||
|
||||
def queue_listener():
|
||||
audio_file = bytearray()
|
||||
|
@ -89,25 +94,32 @@ def queue_listener():
|
|||
|
||||
accumulated_text = ""
|
||||
|
||||
for chunk in interpreter.chat(messages):
|
||||
for chunk in interpreter.chat(messages, stream=True):
|
||||
|
||||
# Send it to the user
|
||||
to_user.put(chunk)
|
||||
|
||||
# Speak full sentences out loud
|
||||
if chunk["type"] == "assistant":
|
||||
if chunk["role"] == "assistant" and "content" in chunk:
|
||||
print("Chunk role is assistant and content is present in chunk.")
|
||||
accumulated_text += chunk["content"]
|
||||
print("Accumulated text: ", accumulated_text)
|
||||
sentences = split_into_sentences(accumulated_text)
|
||||
print("Sentences after splitting: ", sentences)
|
||||
if is_full_sentence(sentences[-1]):
|
||||
print("Last sentence is a full sentence.")
|
||||
for sentence in sentences:
|
||||
for audio_chunk in tts(sentence):
|
||||
to_user.put(audio_chunk)
|
||||
print("Streaming sentence: ", sentence)
|
||||
stream_tts_to_user(sentence)
|
||||
accumulated_text = ""
|
||||
print("Reset accumulated text.")
|
||||
else:
|
||||
print("Last sentence is not a full sentence.")
|
||||
for sentence in sentences[:-1]:
|
||||
for audio_chunk in tts(sentence):
|
||||
to_user.put(audio_chunk)
|
||||
print("Streaming sentence: ", sentence)
|
||||
stream_tts_to_user(sentence)
|
||||
accumulated_text = sentences[-1]
|
||||
print("Accumulated text is now the last sentence: ", accumulated_text)
|
||||
|
||||
# If we have a new message, save our progress and go back to the top
|
||||
if not to_assistant.empty():
|
||||
|
@ -115,6 +127,12 @@ def queue_listener():
|
|||
json.dump(interpreter.messages, file)
|
||||
break
|
||||
|
||||
def stream_tts_to_user(sentence):
|
||||
to_user.put({"role": "assistant", "type": "audio", "format": "audio/mp3", "start": True})
|
||||
audio_bytes = tts(sentence)
|
||||
to_user.put({"role": "assistant", "type": "audio", "format": "audio/mp3", "content": str(audio_bytes)})
|
||||
to_user.put({"role": "assistant", "type": "audio", "format": "audio/mp3", "end": True})
|
||||
|
||||
# Create a thread for the queue listener
|
||||
queue_thread = Thread(target=queue_listener)
|
||||
|
||||
|
|
|
@ -38,8 +38,6 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
|||
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||
|
||||
print(f"Temporary file path: {output_path}")
|
||||
|
||||
try:
|
||||
yield output_path
|
||||
finally:
|
||||
|
|
|
@ -2,41 +2,18 @@
|
|||
Defines a function which takes text and returns a path to an audio file.
|
||||
"""
|
||||
|
||||
from openai import OpenAI
|
||||
import pydub
|
||||
import pydub.playback
|
||||
import tempfile
|
||||
import os
|
||||
from datetime import datetime
|
||||
from io import BytesIO
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI()
|
||||
chunk_size = 1024
|
||||
read_chunk_size = 4096
|
||||
|
||||
def tts(text):
|
||||
|
||||
temp_dir = tempfile.gettempdir()
|
||||
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.mp3")
|
||||
|
||||
try:
|
||||
with (
|
||||
client.with_streaming_response.audio.speech.create(
|
||||
model="tts-1",
|
||||
voice="alloy",
|
||||
input=text,
|
||||
response_format='mp3',
|
||||
speed=1.2)
|
||||
) as response:
|
||||
with open(output_path, 'wb') as f:
|
||||
for chunk in response.iter_bytes(chunk_size):
|
||||
f.write(chunk)
|
||||
|
||||
with open(output_path, 'rb') as f:
|
||||
byte_chunk = f.read(read_chunk_size)
|
||||
yield byte_chunk
|
||||
|
||||
seg = pydub.AudioSegment.from_mp3(output_path)
|
||||
pydub.playback.play(seg)
|
||||
finally:
|
||||
os.remove(output_path)
|
||||
response = client.audio.speech.create(
|
||||
model="tts-1",
|
||||
voice="alloy",
|
||||
input=text,
|
||||
response_format="mp3"
|
||||
)
|
||||
with tempfile.NamedTemporaryFile() as temp_file:
|
||||
response.stream_to_file(temp_file.name)
|
||||
return temp_file.read()
|
||||
|
|
|
@ -7,4 +7,5 @@ uvicorn
|
|||
websockets
|
||||
python-dotenv
|
||||
ffmpeg-python
|
||||
textual
|
||||
textual
|
||||
pydub
|
|
@ -6,6 +6,8 @@ import pyaudio
|
|||
from queue import Queue
|
||||
from pynput import keyboard
|
||||
import json
|
||||
import pydub
|
||||
import ast
|
||||
|
||||
# Configuration for Audio Recording
|
||||
CHUNK = 1024 # Record in chunks of 1024 samples
|
||||
|
@ -77,22 +79,50 @@ def toggle_recording(state):
|
|||
|
||||
async def websocket_communication():
|
||||
"""Handle WebSocket communication and listen for incoming messages."""
|
||||
async with websockets.connect(WS_URL) as websocket:
|
||||
while True:
|
||||
# Send data from the queue to the server
|
||||
while not data_queue.empty():
|
||||
data = data_queue.get_nowait()
|
||||
await websocket.send(json.dumps(data))
|
||||
while True:
|
||||
try:
|
||||
async with websockets.connect(WS_URL) as websocket:
|
||||
|
||||
# Listen for incoming messages from the server
|
||||
try:
|
||||
incoming_message = await asyncio.wait_for(websocket.recv(), timeout=1.0)
|
||||
print(f"Received from server: {incoming_message}")
|
||||
except asyncio.TimeoutError:
|
||||
# No message received within timeout period
|
||||
pass
|
||||
print("Press the spacebar to start/stop recording. Press ESC to exit.")
|
||||
|
||||
while True:
|
||||
# Send data from the queue to the server
|
||||
while not data_queue.empty():
|
||||
data = data_queue.get_nowait()
|
||||
print(f"Sending data to the server: {data}")
|
||||
await websocket.send(json.dumps(data))
|
||||
|
||||
# Listen for incoming messages from the server
|
||||
try:
|
||||
chunk = await websocket.recv()
|
||||
print(f"Received from server: {str(chunk)[:100]}")
|
||||
|
||||
if chunk["type"] == "audio":
|
||||
print("Received audio data from server.")
|
||||
if "start" in chunk:
|
||||
print("Start of audio data received.")
|
||||
audio_chunks = bytearray()
|
||||
if "content" in chunk:
|
||||
print("Audio content received.")
|
||||
audio_chunks.extend(bytes(ast.literal_eval(chunk["content"])))
|
||||
if "end" in chunk:
|
||||
print("End of audio data received.")
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp3") as f:
|
||||
f.write(audio_chunks)
|
||||
f.seek(0)
|
||||
seg = pydub.AudioSegment.from_mp3(f.name)
|
||||
print("Playing received audio.")
|
||||
pydub.playback.play(seg)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error receiving data: {e}")
|
||||
|
||||
print("Sleeping for 0.05 seconds.")
|
||||
await asyncio.sleep(0.05)
|
||||
except Exception as e:
|
||||
print(f"Websocket not ready, retrying... ({e})")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
|
||||
def on_press(key):
|
||||
|
@ -101,9 +131,12 @@ def on_press(key):
|
|||
toggle_recording(True)
|
||||
|
||||
def on_release(key):
|
||||
"""Detect spacebar release."""
|
||||
"""Detect spacebar release and ESC key press."""
|
||||
if key == keyboard.Key.space:
|
||||
toggle_recording(False)
|
||||
elif key == keyboard.Key.esc:
|
||||
print("Exiting...")
|
||||
os._exit(0)
|
||||
|
||||
def main():
|
||||
# Start the WebSocket communication in a separate asyncio event loop
|
||||
|
@ -112,7 +145,7 @@ def main():
|
|||
|
||||
# Keyboard listener for spacebar press/release
|
||||
with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
|
||||
print("Press the spacebar to start/stop recording. Press ESC to exit.")
|
||||
print("In a moment, press the spacebar to start/stop recording. Press ESC to exit.")
|
||||
listener.join()
|
||||
|
||||
p.terminate()
|
||||
|
|
Loading…
Reference in New Issue