add different sample rates for mic and speakers on 01

2024-06-20 21:14:41 -07:00 · 2024-06-20 21:14:41 -07:00 · 2d15bae1ad
parent 3642905ca3
commit 2d15bae1ad
3 changed files with 890 additions and 871 deletions
--- a/software/source/clients/esp32/src/client/client.ino
+++ b/software/source/clients/esp32/src/client/client.ino
@ -541,7 +541,9 @@ void tryReconnectWiFi() {
 }
 void tryReconnectToServer() {
    preferences.begin("network", true); // Open Preferences with the "network" namespace in ReadOnly mode
-    String serverURL = preferences.getString("server_url", ""); // Get stored server URL, if any
+    const String SERVER_URL="sterling-snail-conversely.ngrok-free.app";
+    String serverURL = SERVER_URL; // Get stored server URL, if any
+    // String serverURL = preferences.getString("server_url", ""); // Get stored server URL, if any
    preferences.end(); // Close the Preferences

    if (!serverURL.isEmpty()) {
@ -573,6 +575,9 @@ void tryReconnectToServer() {

 #define MAX_DATA_LEN (1024 * 9)

+#define MIC_SAMPLE_RATE 16000
+#define SPEAKER_SAMPLE_RATE 24000  // or 22050 for OpenAI TTS
+
 uint8_t microphonedata0[1024 * 10];
 uint8_t speakerdata0[1024 * 1];
 int speaker_offset;
@ -615,7 +620,6 @@ void InitI2SSpeakerOrMic(int mode)
    i2s_driver_uninstall(SPEAKER_I2S_NUMBER);
    i2s_config_t i2s_config = {
        .mode = (i2s_mode_t)(I2S_MODE_MASTER),
-        .sample_rate = 16000,
        .bits_per_sample =
            I2S_BITS_PER_SAMPLE_16BIT, // is fixed at 12bit, stereo, MSB
        .channel_format = I2S_CHANNEL_FMT_ALL_RIGHT,
@ -633,12 +637,14 @@ void InitI2SSpeakerOrMic(int mode)
    {
        i2s_config.mode =
            (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM);
+        i2s_config.sample_rate = MIC_SAMPLE_RATE;
    }
    else
    {
        i2s_config.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX);
        i2s_config.use_apll = false;
        i2s_config.tx_desc_auto_clear = true;
+        i2s_config.sample_rate = SPEAKER_SAMPLE_RATE;
    }

    err += i2s_driver_install(SPEAKER_I2S_NUMBER, &i2s_config, 0, NULL);
@ -652,7 +658,9 @@ void InitI2SSpeakerOrMic(int mode)
    tx_pin_config.data_out_num = CONFIG_I2S_DATA_PIN;
    tx_pin_config.data_in_num = CONFIG_I2S_DATA_IN_PIN;
    err += i2s_set_pin(SPEAKER_I2S_NUMBER, &tx_pin_config);
-    err += i2s_set_clk(SPEAKER_I2S_NUMBER, 16000, I2S_BITS_PER_SAMPLE_16BIT,
+    err += i2s_set_clk(SPEAKER_I2S_NUMBER,
+                        (mode == MODE_MIC) ? MIC_SAMPLE_RATE : SPEAKER_SAMPLE_RATE,      // set the sample rate here as well
+                        I2S_BITS_PER_SAMPLE_16BIT,
                        I2S_CHANNEL_MONO);
 }

@ -783,15 +791,18 @@ void setup() {
    Serial.setTxBufferSize(1024); // Set the transmit buffer size for the Serial object.

    WiFi.mode(WIFI_AP_STA); // Set WiFi mode to both AP and STA.
-
+    const String WIFI_NAME="gunner1";
+    const String WIFI_PASSWORD="startup1";
    // delay(100); // Short delay to ensure mode change takes effect
    // WiFi.softAPConfig(localIP, gatewayIP, subnetMask);
    // WiFi.softAP(ssid, password);
-    startSoftAccessPoint(ssid, password, localIP, gatewayIP);
+    // startSoftAccessPoint(ssid, password, localIP, gatewayIP);
+    connectToWifi(WIFI_NAME, WIFI_PASSWORD);
    setUpDNSServer(dnsServer, localIP);
+    tryReconnectToServer();

-    setUpWebserver(server, localIP);
-    tryReconnectWiFi();
+    // setUpWebserver(server, localIP);
+    // tryReconnectWiFi();
    // Print a welcome message to the Serial port.
    Serial.println("\n\nCaptive Test, V0.5.0 compiled " __DATE__ " " __TIME__ " by CD_FER");
    Serial.printf("%s-%d\n\r", ESP.getChipModel(), ESP.getChipRevision());
--- a/software/source/server/async_interpreter.py
+++ b/software/source/server/async_interpreter.py
@ -11,7 +11,7 @@

 ###
 from pynput import keyboard
-
+from .utils.bytes_to_wav import bytes_to_wav
 from RealtimeTTS import TextToAudioStream, CoquiEngine, OpenAIEngine, ElevenlabsEngine
 from RealtimeSTT import AudioToTextRecorder
 import time
@ -23,6 +23,7 @@ import os
 class AsyncInterpreter:
    def __init__(self, interpreter):
        self.interpreter = interpreter
+        self.audio_chunks = []

        # STT
        self.stt = AudioToTextRecorder(
@ -73,6 +74,7 @@ class AsyncInterpreter:
        if isinstance(chunk, bytes):
            # It's probably a chunk of audio
            self.stt.feed_audio(chunk)
+            self.audio_chunks.append(chunk)
            # print("INTERPRETER FEEDING AUDIO")

        else:
@ -171,6 +173,12 @@ class AsyncInterpreter:

        message = self.stt.text()

+        if self.audio_chunks:
+            audio_bytes = bytearray(b"".join(self.audio_chunks))
+            wav_file_path = bytes_to_wav(audio_bytes, "audio/raw")
+            print("wav_file_path ", wav_file_path)
+            self.audio_chunks = []
+
        print(message)

        # Feed generate to RealtimeTTS
@ -181,7 +189,7 @@ class AsyncInterpreter:
        text_iterator = self.generate(message, start_interpreter)

        self.tts.feed(text_iterator)
-
+        if not self.tts.is_playing():
            self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True)

        while True:
--- a/software/source/server/profiles/default.py
+++ b/software/source/server/profiles/default.py
@ -5,7 +5,7 @@ from interpreter import interpreter

 # 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
 # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
-interpreter.tts = "elevenlabs"
+interpreter.tts = "openai"

 # Connect your 01 to a language model
 interpreter.llm.model = "gpt-4-turbo"