Removed print statements, better audio playback
This commit is contained in:
parent
4640b4f1a0
commit
d2496fa8a2
|
@ -2243,13 +2243,13 @@ socks = ["socksio (==1.*)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "huggingface-hub"
|
name = "huggingface-hub"
|
||||||
version = "0.23.4"
|
version = "0.23.5"
|
||||||
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
|
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8.0"
|
python-versions = ">=3.8.0"
|
||||||
files = [
|
files = [
|
||||||
{file = "huggingface_hub-0.23.4-py3-none-any.whl", hash = "sha256:3a0b957aa87150addf0cc7bd71b4d954b78e749850e1e7fb29ebbd2db64ca037"},
|
{file = "huggingface_hub-0.23.5-py3-none-any.whl", hash = "sha256:d7a7d337615e11a45cc14a0ce5a605db6b038dc24af42866f731684825226e90"},
|
||||||
{file = "huggingface_hub-0.23.4.tar.gz", hash = "sha256:35d99016433900e44ae7efe1c209164a5a81dbbcd53a52f99c281dcd7ce22431"},
|
{file = "huggingface_hub-0.23.5.tar.gz", hash = "sha256:67a9caba79b71235be3752852ca27da86bd54311d2424ca8afdb8dda056edf98"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
@ -3988,7 +3988,7 @@ server = ["fastapi (>=0.111.0,<0.112.0)", "janus (>=1.0.0,<2.0.0)", "uvicorn (>=
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/OpenInterpreter/open-interpreter.git"
|
url = "https://github.com/OpenInterpreter/open-interpreter.git"
|
||||||
reference = "development"
|
reference = "development"
|
||||||
resolved_reference = "3db7e4b2dd93f48e1761ccbd24cd2b5a7985b06f"
|
resolved_reference = "59409c2ddccb1a8d457099de7e24021afcba9ad8"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openai"
|
name = "openai"
|
||||||
|
@ -4032,10 +4032,10 @@ files = [
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
numpy = [
|
numpy = [
|
||||||
{version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
|
{version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
|
||||||
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
|
|
||||||
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
|
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
|
||||||
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
|
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
|
||||||
{version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
|
{version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
|
||||||
|
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
@ -3,6 +3,7 @@ import websockets
|
||||||
import pyaudio
|
import pyaudio
|
||||||
from pynput import keyboard
|
from pynput import keyboard
|
||||||
import json
|
import json
|
||||||
|
from yaspin import yaspin
|
||||||
|
|
||||||
CHUNK = 1024
|
CHUNK = 1024
|
||||||
FORMAT = pyaudio.paInt16
|
FORMAT = pyaudio.paInt16
|
||||||
|
@ -18,6 +19,8 @@ class Device:
|
||||||
self.recording = False
|
self.recording = False
|
||||||
self.input_stream = None
|
self.input_stream = None
|
||||||
self.output_stream = None
|
self.output_stream = None
|
||||||
|
self.spinner = yaspin()
|
||||||
|
self.play_audio = True
|
||||||
|
|
||||||
async def connect_with_retry(self, max_retries=50, retry_delay=2):
|
async def connect_with_retry(self, max_retries=50, retry_delay=2):
|
||||||
for attempt in range(max_retries):
|
for attempt in range(max_retries):
|
||||||
|
@ -26,7 +29,8 @@ class Device:
|
||||||
print("Connected to server.")
|
print("Connected to server.")
|
||||||
return
|
return
|
||||||
except ConnectionRefusedError:
|
except ConnectionRefusedError:
|
||||||
print(f"Waiting for the server to be ready. Retrying in {retry_delay} seconds...")
|
if attempt % 4 == 0:
|
||||||
|
print(f"Waiting for the server to be ready...")
|
||||||
await asyncio.sleep(retry_delay)
|
await asyncio.sleep(retry_delay)
|
||||||
raise Exception("Failed to connect to the server after multiple attempts")
|
raise Exception("Failed to connect to the server after multiple attempts")
|
||||||
|
|
||||||
|
@ -37,7 +41,7 @@ class Device:
|
||||||
try:
|
try:
|
||||||
# Send start flag
|
# Send start flag
|
||||||
await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "start": True}))
|
await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "start": True}))
|
||||||
print("Sending audio start message")
|
#print("Sending audio start message")
|
||||||
|
|
||||||
while self.recording:
|
while self.recording:
|
||||||
data = self.input_stream.read(CHUNK, exception_on_overflow=False)
|
data = self.input_stream.read(CHUNK, exception_on_overflow=False)
|
||||||
|
@ -45,7 +49,7 @@ class Device:
|
||||||
|
|
||||||
# Send stop flag
|
# Send stop flag
|
||||||
await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "end": True}))
|
await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "end": True}))
|
||||||
print("Sending audio end message")
|
#print("Sending audio end message")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error in send_audio: {e}")
|
print(f"Error in send_audio: {e}")
|
||||||
await asyncio.sleep(0.01)
|
await asyncio.sleep(0.01)
|
||||||
|
@ -56,26 +60,30 @@ class Device:
|
||||||
try:
|
try:
|
||||||
data = await self.websocket.recv()
|
data = await self.websocket.recv()
|
||||||
if isinstance(data, bytes) and not self.recording:
|
if isinstance(data, bytes) and not self.recording:
|
||||||
self.output_stream.write(data)
|
if self.play_audio:
|
||||||
|
self.output_stream.write(data)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error in receive_audio: {e}")
|
print(f"Error in receive_audio: {e}")
|
||||||
|
|
||||||
def on_press(self, key):
|
def on_press(self, key):
|
||||||
if key == keyboard.Key.space and not self.recording:
|
if key == keyboard.Key.space and not self.recording:
|
||||||
print("Space pressed, starting recording")
|
#print("Space pressed, starting recording")
|
||||||
|
print("\n")
|
||||||
|
self.spinner.start()
|
||||||
self.recording = True
|
self.recording = True
|
||||||
|
|
||||||
def on_release(self, key):
|
def on_release(self, key):
|
||||||
if key == keyboard.Key.space:
|
if key == keyboard.Key.space:
|
||||||
print("Space released, stopping recording")
|
self.spinner.stop()
|
||||||
|
#print("Space released, stopping recording")
|
||||||
self.recording = False
|
self.recording = False
|
||||||
elif key == keyboard.Key.esc:
|
# elif key == keyboard.Key.esc:
|
||||||
print("Esc pressed, stopping the program")
|
# print("Esc pressed, stopping the program")
|
||||||
return False
|
# return False
|
||||||
|
|
||||||
async def main(self):
|
async def main(self):
|
||||||
await self.connect_with_retry()
|
await self.connect_with_retry()
|
||||||
print("Hold spacebar to record. Press 'Esc' to quit.")
|
print("Hold spacebar to record. Press 'CTRL-C' to quit.")
|
||||||
listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release)
|
listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release)
|
||||||
listener.start()
|
listener.start()
|
||||||
await asyncio.gather(self.send_audio(), self.receive_audio())
|
await asyncio.gather(self.send_audio(), self.receive_audio())
|
||||||
|
|
|
@ -3,9 +3,10 @@ from ..base_device import Device
|
||||||
device = Device()
|
device = Device()
|
||||||
|
|
||||||
|
|
||||||
def main(server_url, debug):
|
def main(server_url, debug, play_audio):
|
||||||
device.server_url = server_url
|
device.server_url = server_url
|
||||||
device.debug = debug
|
device.debug = debug
|
||||||
|
device.play_audio = play_audio
|
||||||
device.start()
|
device.start()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ import wave
|
||||||
import asyncio
|
import asyncio
|
||||||
from fastapi.responses import PlainTextResponse
|
from fastapi.responses import PlainTextResponse
|
||||||
|
|
||||||
def start_server(server_host, server_port, profile, debug):
|
def start_server(server_host, server_port, profile, debug, play_audio):
|
||||||
|
|
||||||
# Load the profile module from the provided path
|
# Load the profile module from the provided path
|
||||||
spec = importlib.util.spec_from_file_location("profile", profile)
|
spec = importlib.util.spec_from_file_location("profile", profile)
|
||||||
|
@ -47,6 +47,8 @@ def start_server(server_host, server_port, profile, debug):
|
||||||
interpreter.server.host = server_host
|
interpreter.server.host = server_host
|
||||||
interpreter.server.port = server_port
|
interpreter.server.port = server_port
|
||||||
|
|
||||||
|
interpreter.play_audio = play_audio
|
||||||
|
|
||||||
|
|
||||||
interpreter.audio_chunks = []
|
interpreter.audio_chunks = []
|
||||||
|
|
||||||
|
@ -100,12 +102,12 @@ def start_server(server_host, server_port, profile, debug):
|
||||||
if output["type"] == "message" and len(output.get("content", "")) > 0:
|
if output["type"] == "message" and len(output.get("content", "")) > 0:
|
||||||
self.tts.feed(output.get("content"))
|
self.tts.feed(output.get("content"))
|
||||||
if not self.tts.is_playing() and any([c in delimiters for c in output.get("content")]): # Start playing once the first delimiter is encountered.
|
if not self.tts.is_playing() and any([c in delimiters for c in output.get("content")]): # Start playing once the first delimiter is encountered.
|
||||||
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True, sentence_fragment_delimiters=delimiters)
|
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=not self.play_audio, sentence_fragment_delimiters=delimiters)
|
||||||
return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
|
return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
|
||||||
|
|
||||||
if output == {"role": "assistant", "type": "message", "end": True}:
|
if output == {"role": "assistant", "type": "message", "end": True}:
|
||||||
if not self.tts.is_playing(): # We put this here in case it never outputs a delimiter and never triggers play_async^
|
if not self.tts.is_playing(): # We put this here in case it never outputs a delimiter and never triggers play_async^
|
||||||
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True, sentence_fragment_delimiters=delimiters)
|
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=not self.play_audio, sentence_fragment_delimiters=delimiters)
|
||||||
return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
|
return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
|
||||||
return {"role": "assistant", "type": "audio", "format": "bytes.wav", "end": True}
|
return {"role": "assistant", "type": "audio", "format": "bytes.wav", "end": True}
|
||||||
|
|
||||||
|
|
|
@ -134,6 +134,11 @@ def _run(
|
||||||
signal.signal(signal.SIGINT, handle_exit)
|
signal.signal(signal.SIGINT, handle_exit)
|
||||||
|
|
||||||
if server:
|
if server:
|
||||||
|
# Have the server play audio if we're running this on the same device. Needless pops and clicks otherwise!
|
||||||
|
if client:
|
||||||
|
play_audio = True
|
||||||
|
else:
|
||||||
|
play_audio = False
|
||||||
server_thread = threading.Thread(
|
server_thread = threading.Thread(
|
||||||
target=start_server,
|
target=start_server,
|
||||||
args=(
|
args=(
|
||||||
|
@ -141,6 +146,7 @@ def _run(
|
||||||
server_port,
|
server_port,
|
||||||
profile,
|
profile,
|
||||||
debug,
|
debug,
|
||||||
|
play_audio,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
server_thread.start()
|
server_thread.start()
|
||||||
|
@ -172,7 +178,13 @@ def _run(
|
||||||
f".clients.{client_type}.device", package="source"
|
f".clients.{client_type}.device", package="source"
|
||||||
)
|
)
|
||||||
|
|
||||||
client_thread = threading.Thread(target=module.main, args=[server_url, debug])
|
# Have the server play audio if we're running this on the same device. Needless pops and clicks otherwise!
|
||||||
|
if server:
|
||||||
|
play_audio = False
|
||||||
|
else:
|
||||||
|
play_audio = True
|
||||||
|
|
||||||
|
client_thread = threading.Thread(target=module.main, args=[server_url, debug, play_audio])
|
||||||
client_thread.start()
|
client_thread.start()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in New Issue