add different sample rates for mic and speakers on 01

2024-06-20 21:14:41 -07:00 · 2024-06-20 21:14:41 -07:00 · 2d15bae1ad
parent 3642905ca3
commit 2d15bae1ad
3 changed files with 890 additions and 871 deletions
--- a/software/source/clients/esp32/src/client/client.ino
+++ b/software/source/clients/esp32/src/client/client.ino
--- a/software/source/server/async_interpreter.py
+++ b/software/source/server/async_interpreter.py
@ -11,7 +11,7 @@
 ###
 from pynput import keyboard
-
+from .utils.bytes_to_wav import bytes_to_wav
 from RealtimeTTS import TextToAudioStream, CoquiEngine, OpenAIEngine, ElevenlabsEngine
 from RealtimeSTT import AudioToTextRecorder
 import time
@ -23,6 +23,7 @@ import os
 class AsyncInterpreter:
    def __init__(self, interpreter):
        self.interpreter = interpreter
        self.audio_chunks = []
        # STT
        self.stt = AudioToTextRecorder(
@ -73,6 +74,7 @@ class AsyncInterpreter:
        if isinstance(chunk, bytes):
            # It's probably a chunk of audio
            self.stt.feed_audio(chunk)
            self.audio_chunks.append(chunk)
            # print("INTERPRETER FEEDING AUDIO")
        else:
@ -171,6 +173,12 @@ class AsyncInterpreter:
        message = self.stt.text()
        if self.audio_chunks:
            audio_bytes = bytearray(b"".join(self.audio_chunks))
            wav_file_path = bytes_to_wav(audio_bytes, "audio/raw")
            print("wav_file_path ", wav_file_path)
            self.audio_chunks = []
        print(message)
        # Feed generate to RealtimeTTS
@ -181,8 +189,8 @@ class AsyncInterpreter:
        text_iterator = self.generate(message, start_interpreter)
        self.tts.feed(text_iterator)
-
+        if not self.tts.is_playing():
-        self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True)
+            self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True)
        while True:
            await asyncio.sleep(0.1)
--- a/software/source/server/profiles/default.py
+++ b/software/source/server/profiles/default.py
@ -5,7 +5,7 @@ from interpreter import interpreter
 # 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
 # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
-interpreter.tts = "elevenlabs"
+interpreter.tts = "openai"
 # Connect your 01 to a language model
 interpreter.llm.model = "gpt-4-turbo"