send wav files

2024-04-24 17:59:41 -07:00 · 2024-04-24 17:59:41 -07:00 · 0602348f1c
parent f673744f1b
commit 0602348f1c
4 changed files with 27 additions and 25 deletions
--- a/.gitignore
+++ b/.gitignore
@ -169,3 +169,6 @@ cython_debug/
 _.aifs
 software/output_audio.wav
 .DS_Store
 node_modules/
 .expo/
--- a/software/source/server/server.py
+++ b/software/source/server/server.py
@ -21,8 +21,7 @@ from ..utils.accumulator import Accumulator
 from .utils.logs import setup_logging
 from .utils.logs import logger
 import base64
-from google.cloud import storage
+import shutil
 from ..utils.print_markdown import print_markdown
 os.environ["STT_RUNNER"] = "server"
@ -394,31 +393,31 @@ def stream_tts(sentence):
    with open(audio_file, "rb") as f:
        audio_bytes = f.read()
    desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop')
    desktop_audio_file = os.path.join(desktop_path, os.path.basename(audio_file))
    shutil.copy(audio_file, desktop_audio_file)
    print(f"Audio file saved to Desktop: {desktop_audio_file}")
    # storage_client = storage.Client(project="react-native-421323")
    # bucket = storage_client.bucket("01-audio")
    # blob = bucket.blob(f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
    # generation_match_precondition = 0
-    storage_client = storage.Client(project="react-native-421323")
+    # blob.upload_from_filename(
-    bucket = storage_client.bucket("01-audio")
+    #     audio_file, if_generation_match=generation_match_precondition
-    blob = bucket.blob(f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
+    # )
-    generation_match_precondition = 0
+    # print(
-
+    #     f"Audio file {audio_file} uploaded to {datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav"
-    blob.upload_from_filename(
+    # )
        audio_file, if_generation_match=generation_match_precondition
    )
    print(
        f"Audio file {audio_file} uploaded to {datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav"
    )
    os.remove(audio_file)
-    file_type = "bytes.raw"
+    file_type = "audio/wav"
-    chunk_size = 1024
+    # Read the entire WAV file
-
+    with open(audio_file, "rb") as f:
-    # Stream the audio
+        audio_bytes = f.read()
    yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
    for i in range(0, len(audio_bytes), chunk_size):
        chunk = audio_bytes[i : i + chunk_size]
        yield chunk
    yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
    # Stream the audio as a single message
    yield {"role": "assistant", "type": "audio", "format": file_type, "content": base64.b64encode(audio_bytes).decode('utf-8'), "start": True, "end": True}
 from uvicorn import Config, Server
 import os
--- a/software/source/server/services/tts/openai/tts.py
+++ b/software/source/server/services/tts/openai/tts.py
@ -36,9 +36,9 @@ class Tts:
            response.stream_to_file(temp_file.name)
            # TODO: hack to format audio correctly for device
-            outfile = tempfile.gettempdir() + "/" + "raw.dat"
+            outfile = tempfile.gettempdir() + "/" + "output.wav"
            ffmpeg.input(temp_file.name).output(
-                outfile, f="s16le", ar="16000", ac="1", loglevel="panic"
+                outfile, f="wav", ar="16000", ac="1", loglevel="panic"
            ).run()
            return outfile
--- a/software/source/server/tunnel.py
+++ b/software/source/server/tunnel.py
@ -100,7 +100,7 @@ def create_tunnel(
        # If ngrok is installed, start it on the specified port
        # process = subprocess.Popen(f'ngrok http {server_port} --log=stdout', shell=True, stdout=subprocess.PIPE)
        process = subprocess.Popen(
-            f"ngrok http {server_port} --scheme http,https --domain=sterling-snail-conversely.ngrok-free.app --log=stdout",
+            f"ngrok http {server_port} --scheme http,https  --log=stdout",
            shell=True,
            stdout=subprocess.PIPE,
        )