Removed print statements, better audio playback
This commit is contained in:
		
							parent
							
								
									4640b4f1a0
								
							
						
					
					
						commit
						d2496fa8a2
					
				| 
						 | 
					@ -2243,13 +2243,13 @@ socks = ["socksio (==1.*)"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "huggingface-hub"
 | 
					name = "huggingface-hub"
 | 
				
			||||||
version = "0.23.4"
 | 
					version = "0.23.5"
 | 
				
			||||||
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 | 
					description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 | 
				
			||||||
optional = false
 | 
					optional = false
 | 
				
			||||||
python-versions = ">=3.8.0"
 | 
					python-versions = ">=3.8.0"
 | 
				
			||||||
files = [
 | 
					files = [
 | 
				
			||||||
    {file = "huggingface_hub-0.23.4-py3-none-any.whl", hash = "sha256:3a0b957aa87150addf0cc7bd71b4d954b78e749850e1e7fb29ebbd2db64ca037"},
 | 
					    {file = "huggingface_hub-0.23.5-py3-none-any.whl", hash = "sha256:d7a7d337615e11a45cc14a0ce5a605db6b038dc24af42866f731684825226e90"},
 | 
				
			||||||
    {file = "huggingface_hub-0.23.4.tar.gz", hash = "sha256:35d99016433900e44ae7efe1c209164a5a81dbbcd53a52f99c281dcd7ce22431"},
 | 
					    {file = "huggingface_hub-0.23.5.tar.gz", hash = "sha256:67a9caba79b71235be3752852ca27da86bd54311d2424ca8afdb8dda056edf98"},
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[package.dependencies]
 | 
					[package.dependencies]
 | 
				
			||||||
| 
						 | 
					@ -3988,7 +3988,7 @@ server = ["fastapi (>=0.111.0,<0.112.0)", "janus (>=1.0.0,<2.0.0)", "uvicorn (>=
 | 
				
			||||||
type = "git"
 | 
					type = "git"
 | 
				
			||||||
url = "https://github.com/OpenInterpreter/open-interpreter.git"
 | 
					url = "https://github.com/OpenInterpreter/open-interpreter.git"
 | 
				
			||||||
reference = "development"
 | 
					reference = "development"
 | 
				
			||||||
resolved_reference = "3db7e4b2dd93f48e1761ccbd24cd2b5a7985b06f"
 | 
					resolved_reference = "59409c2ddccb1a8d457099de7e24021afcba9ad8"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "openai"
 | 
					name = "openai"
 | 
				
			||||||
| 
						 | 
					@ -4032,10 +4032,10 @@ files = [
 | 
				
			||||||
[package.dependencies]
 | 
					[package.dependencies]
 | 
				
			||||||
numpy = [
 | 
					numpy = [
 | 
				
			||||||
    {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
 | 
					    {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
 | 
				
			||||||
    {version = ">=1.23.5", markers = "python_version >= \"3.11\""},
 | 
					 | 
				
			||||||
    {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
 | 
					    {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
 | 
				
			||||||
    {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
 | 
					    {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
 | 
				
			||||||
    {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
 | 
					    {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
 | 
				
			||||||
 | 
					    {version = ">=1.23.5", markers = "python_version >= \"3.11\""},
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,6 +3,7 @@ import websockets
 | 
				
			||||||
import pyaudio
 | 
					import pyaudio
 | 
				
			||||||
from pynput import keyboard
 | 
					from pynput import keyboard
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
 | 
					from yaspin import yaspin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CHUNK = 1024
 | 
					CHUNK = 1024
 | 
				
			||||||
FORMAT = pyaudio.paInt16
 | 
					FORMAT = pyaudio.paInt16
 | 
				
			||||||
| 
						 | 
					@ -18,6 +19,8 @@ class Device:
 | 
				
			||||||
        self.recording = False
 | 
					        self.recording = False
 | 
				
			||||||
        self.input_stream = None
 | 
					        self.input_stream = None
 | 
				
			||||||
        self.output_stream = None
 | 
					        self.output_stream = None
 | 
				
			||||||
 | 
					        self.spinner = yaspin()
 | 
				
			||||||
 | 
					        self.play_audio = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    async def connect_with_retry(self, max_retries=50, retry_delay=2):
 | 
					    async def connect_with_retry(self, max_retries=50, retry_delay=2):
 | 
				
			||||||
        for attempt in range(max_retries):
 | 
					        for attempt in range(max_retries):
 | 
				
			||||||
| 
						 | 
					@ -26,7 +29,8 @@ class Device:
 | 
				
			||||||
                print("Connected to server.")
 | 
					                print("Connected to server.")
 | 
				
			||||||
                return
 | 
					                return
 | 
				
			||||||
            except ConnectionRefusedError:
 | 
					            except ConnectionRefusedError:
 | 
				
			||||||
                print(f"Waiting for the server to be ready. Retrying in {retry_delay} seconds...")
 | 
					                if attempt % 4 == 0:
 | 
				
			||||||
 | 
					                    print(f"Waiting for the server to be ready...")
 | 
				
			||||||
                await asyncio.sleep(retry_delay)
 | 
					                await asyncio.sleep(retry_delay)
 | 
				
			||||||
        raise Exception("Failed to connect to the server after multiple attempts")
 | 
					        raise Exception("Failed to connect to the server after multiple attempts")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -37,7 +41,7 @@ class Device:
 | 
				
			||||||
                try:
 | 
					                try:
 | 
				
			||||||
                    # Send start flag
 | 
					                    # Send start flag
 | 
				
			||||||
                    await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "start": True}))
 | 
					                    await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "start": True}))
 | 
				
			||||||
                    print("Sending audio start message")
 | 
					                    #print("Sending audio start message")
 | 
				
			||||||
                    
 | 
					                    
 | 
				
			||||||
                    while self.recording:
 | 
					                    while self.recording:
 | 
				
			||||||
                        data = self.input_stream.read(CHUNK, exception_on_overflow=False)
 | 
					                        data = self.input_stream.read(CHUNK, exception_on_overflow=False)
 | 
				
			||||||
| 
						 | 
					@ -45,7 +49,7 @@ class Device:
 | 
				
			||||||
                    
 | 
					                    
 | 
				
			||||||
                    # Send stop flag
 | 
					                    # Send stop flag
 | 
				
			||||||
                    await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "end": True}))
 | 
					                    await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "end": True}))
 | 
				
			||||||
                    print("Sending audio end message")
 | 
					                    #print("Sending audio end message")
 | 
				
			||||||
                except Exception as e:
 | 
					                except Exception as e:
 | 
				
			||||||
                    print(f"Error in send_audio: {e}")
 | 
					                    print(f"Error in send_audio: {e}")
 | 
				
			||||||
            await asyncio.sleep(0.01)
 | 
					            await asyncio.sleep(0.01)
 | 
				
			||||||
| 
						 | 
					@ -56,26 +60,30 @@ class Device:
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                data = await self.websocket.recv()
 | 
					                data = await self.websocket.recv()
 | 
				
			||||||
                if isinstance(data, bytes) and not self.recording:
 | 
					                if isinstance(data, bytes) and not self.recording:
 | 
				
			||||||
                    self.output_stream.write(data)
 | 
					                    if self.play_audio:
 | 
				
			||||||
 | 
					                        self.output_stream.write(data)
 | 
				
			||||||
            except Exception as e:
 | 
					            except Exception as e:
 | 
				
			||||||
                print(f"Error in receive_audio: {e}")
 | 
					                print(f"Error in receive_audio: {e}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def on_press(self, key):
 | 
					    def on_press(self, key):
 | 
				
			||||||
        if key == keyboard.Key.space and not self.recording:
 | 
					        if key == keyboard.Key.space and not self.recording:
 | 
				
			||||||
            print("Space pressed, starting recording")
 | 
					            #print("Space pressed, starting recording")
 | 
				
			||||||
 | 
					            print("\n")
 | 
				
			||||||
 | 
					            self.spinner.start()
 | 
				
			||||||
            self.recording = True
 | 
					            self.recording = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def on_release(self, key):
 | 
					    def on_release(self, key):
 | 
				
			||||||
        if key == keyboard.Key.space:
 | 
					        if key == keyboard.Key.space:
 | 
				
			||||||
            print("Space released, stopping recording")
 | 
					            self.spinner.stop()
 | 
				
			||||||
 | 
					            #print("Space released, stopping recording")
 | 
				
			||||||
            self.recording = False
 | 
					            self.recording = False
 | 
				
			||||||
        elif key == keyboard.Key.esc:
 | 
					        # elif key == keyboard.Key.esc:
 | 
				
			||||||
            print("Esc pressed, stopping the program")
 | 
					        #     print("Esc pressed, stopping the program")
 | 
				
			||||||
            return False
 | 
					        #     return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    async def main(self):
 | 
					    async def main(self):
 | 
				
			||||||
        await self.connect_with_retry()
 | 
					        await self.connect_with_retry()
 | 
				
			||||||
        print("Hold spacebar to record. Press 'Esc' to quit.")
 | 
					        print("Hold spacebar to record. Press 'CTRL-C' to quit.")
 | 
				
			||||||
        listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release)
 | 
					        listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release)
 | 
				
			||||||
        listener.start()
 | 
					        listener.start()
 | 
				
			||||||
        await asyncio.gather(self.send_audio(), self.receive_audio())
 | 
					        await asyncio.gather(self.send_audio(), self.receive_audio())
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,9 +3,10 @@ from ..base_device import Device
 | 
				
			||||||
device = Device()
 | 
					device = Device()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def main(server_url, debug):
 | 
					def main(server_url, debug, play_audio):
 | 
				
			||||||
    device.server_url = server_url
 | 
					    device.server_url = server_url
 | 
				
			||||||
    device.debug = debug
 | 
					    device.debug = debug
 | 
				
			||||||
 | 
					    device.play_audio = play_audio
 | 
				
			||||||
    device.start()
 | 
					    device.start()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,7 +10,7 @@ import wave
 | 
				
			||||||
import asyncio
 | 
					import asyncio
 | 
				
			||||||
from fastapi.responses import PlainTextResponse
 | 
					from fastapi.responses import PlainTextResponse
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def start_server(server_host, server_port, profile, debug):
 | 
					def start_server(server_host, server_port, profile, debug, play_audio):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load the profile module from the provided path
 | 
					    # Load the profile module from the provided path
 | 
				
			||||||
    spec = importlib.util.spec_from_file_location("profile", profile)
 | 
					    spec = importlib.util.spec_from_file_location("profile", profile)
 | 
				
			||||||
| 
						 | 
					@ -47,6 +47,8 @@ def start_server(server_host, server_port, profile, debug):
 | 
				
			||||||
    interpreter.server.host = server_host
 | 
					    interpreter.server.host = server_host
 | 
				
			||||||
    interpreter.server.port = server_port
 | 
					    interpreter.server.port = server_port
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    interpreter.play_audio = play_audio
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    interpreter.audio_chunks = []
 | 
					    interpreter.audio_chunks = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -100,12 +102,12 @@ def start_server(server_host, server_port, profile, debug):
 | 
				
			||||||
            if output["type"] == "message" and len(output.get("content", "")) > 0:
 | 
					            if output["type"] == "message" and len(output.get("content", "")) > 0:
 | 
				
			||||||
                self.tts.feed(output.get("content"))
 | 
					                self.tts.feed(output.get("content"))
 | 
				
			||||||
                if not self.tts.is_playing() and any([c in delimiters for c in output.get("content")]): # Start playing once the first delimiter is encountered.
 | 
					                if not self.tts.is_playing() and any([c in delimiters for c in output.get("content")]): # Start playing once the first delimiter is encountered.
 | 
				
			||||||
                    self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True, sentence_fragment_delimiters=delimiters)
 | 
					                    self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=not self.play_audio, sentence_fragment_delimiters=delimiters)
 | 
				
			||||||
                    return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
 | 
					                    return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if output == {"role": "assistant", "type": "message", "end": True}:
 | 
					            if output == {"role": "assistant", "type": "message", "end": True}:
 | 
				
			||||||
                if not self.tts.is_playing(): # We put this here in case it never outputs a delimiter and never triggers play_async^
 | 
					                if not self.tts.is_playing(): # We put this here in case it never outputs a delimiter and never triggers play_async^
 | 
				
			||||||
                    self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True, sentence_fragment_delimiters=delimiters)
 | 
					                    self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=not self.play_audio, sentence_fragment_delimiters=delimiters)
 | 
				
			||||||
                    return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
 | 
					                    return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
 | 
				
			||||||
                return {"role": "assistant", "type": "audio", "format": "bytes.wav", "end": True}
 | 
					                return {"role": "assistant", "type": "audio", "format": "bytes.wav", "end": True}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -134,6 +134,11 @@ def _run(
 | 
				
			||||||
    signal.signal(signal.SIGINT, handle_exit)
 | 
					    signal.signal(signal.SIGINT, handle_exit)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if server:
 | 
					    if server:
 | 
				
			||||||
 | 
					        # Have the server play audio if we're running this on the same device. Needless pops and clicks otherwise!
 | 
				
			||||||
 | 
					        if client:
 | 
				
			||||||
 | 
					            play_audio = True
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            play_audio = False
 | 
				
			||||||
        server_thread = threading.Thread(
 | 
					        server_thread = threading.Thread(
 | 
				
			||||||
            target=start_server,
 | 
					            target=start_server,
 | 
				
			||||||
            args=(
 | 
					            args=(
 | 
				
			||||||
| 
						 | 
					@ -141,6 +146,7 @@ def _run(
 | 
				
			||||||
                server_port,
 | 
					                server_port,
 | 
				
			||||||
                profile,
 | 
					                profile,
 | 
				
			||||||
                debug,
 | 
					                debug,
 | 
				
			||||||
 | 
					                play_audio,
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        server_thread.start()
 | 
					        server_thread.start()
 | 
				
			||||||
| 
						 | 
					@ -172,7 +178,13 @@ def _run(
 | 
				
			||||||
            f".clients.{client_type}.device", package="source"
 | 
					            f".clients.{client_type}.device", package="source"
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        client_thread = threading.Thread(target=module.main, args=[server_url, debug])
 | 
					        # Have the server play audio if we're running this on the same device. Needless pops and clicks otherwise!
 | 
				
			||||||
 | 
					        if server:
 | 
				
			||||||
 | 
					            play_audio = False
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            play_audio = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        client_thread = threading.Thread(target=module.main, args=[server_url, debug, play_audio])
 | 
				
			||||||
        client_thread.start()
 | 
					        client_thread.start()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue