feat: added local piper TTS
This commit is contained in:
parent
65acb1163f
commit
7582c8ad02
|
@ -1,5 +1,5 @@
|
|||
ggml-*.bin
|
||||
|
||||
OS/01/local_tts/*
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
|
|
@ -1 +1,12 @@
|
|||
[{"role": "user", "type": "message", "content": " Hey, how you doing?\n"}]
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
"content": " Hello, how are you doing?\n"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"type": "message",
|
||||
"content": "I'm an artificial intelligence, so I don't have feelings, but thank you for asking. How may I assist you today?"
|
||||
}
|
||||
]
|
|
@ -6,6 +6,11 @@ export ALL_LOCAL=False
|
|||
# export WHISPER_MODEL_PATH=...
|
||||
# export OPENAI_API_KEY=sk-...
|
||||
|
||||
# For TTS, we use the en_US-lessac-medium voice model by default
|
||||
# Please change the voice URL and voice name if you wish to use another voice
|
||||
export PIPER_VOICE_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/"
|
||||
export PIPER_VOICE_NAME="en_US-lessac-medium.onnx"
|
||||
|
||||
# If SERVER_START, this is where we'll serve the server.
|
||||
# If DEVICE_START, this is where the device expects the server to be.
|
||||
export SERVER_URL=ws://localhost:8000/
|
||||
|
@ -22,6 +27,46 @@ export SERVER_EXPOSE_PUBLICALLY=False
|
|||
|
||||
### SETUP
|
||||
|
||||
# if using local models, install the models / executables
|
||||
if [[ "$ALL_LOCAL" == "True" ]]; then
|
||||
OS=$(uname -s)
|
||||
ARCH=$(uname -m)
|
||||
if [ "$OS" = "Darwin" ]; then
|
||||
OS="macos"
|
||||
if [ "$ARCH" = "arm64" ]; then
|
||||
ARCH="aarch64"
|
||||
elif [ "$ARCH" = "x86_64" ]; then
|
||||
ARCH="x64"
|
||||
else
|
||||
echo "Piper: unsupported architecture"
|
||||
fi
|
||||
fi
|
||||
PIPER_ASSETNAME="piper_${OS}_${ARCH}.tar.gz"
|
||||
PIPER_URL="https://github.com/rhasspy/piper/releases/latest/download/"
|
||||
mkdir local_tts
|
||||
cd local_tts
|
||||
curl -OL "${PIPER_URL}${PIPER_ASSETNAME}"
|
||||
tar -xvzf $PIPER_ASSETNAME
|
||||
cd piper
|
||||
if [ "$OS" = "macos" ]; then
|
||||
if [ "$ARCH" = "x64" ]; then
|
||||
softwareupdate --install-rosetta --agree-to-license
|
||||
fi
|
||||
PIPER_PHONEMIZE_ASSETNAME="piper-phonemize_${OS}_${ARCH}.tar.gz"
|
||||
PIPER_PHONEMIZE_URL="https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
|
||||
|
||||
curl -OL "${PIPER_PHONEMIZE_URL}${PIPER_PHONEMIZE_ASSETNAME}"
|
||||
tar -xvzf $PIPER_PHONEMIZE_ASSETNAME
|
||||
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}"
|
||||
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}.json"
|
||||
PIPER_DIR=`pwd`
|
||||
install_name_tool -change @rpath/libespeak-ng.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib" "${PIPER_DIR}/piper"
|
||||
install_name_tool -change @rpath/libonnxruntime.1.14.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib" "${PIPER_DIR}/piper"
|
||||
install_name_tool -change @rpath/libpiper_phonemize.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib" "${PIPER_DIR}/piper"
|
||||
fi
|
||||
cd ../..
|
||||
fi
|
||||
|
||||
# (for dev, reset the ports we were using)
|
||||
|
||||
SERVER_PORT=$(echo $SERVER_URL | grep -oE "[0-9]+")
|
||||
|
|
48
OS/01/tts.py
48
OS/01/tts.py
|
@ -7,20 +7,42 @@ from openai import OpenAI
|
|||
from pydub import AudioSegment
|
||||
from pydub.playback import play
|
||||
from playsound import playsound
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
def run_command(command):
|
||||
print(command)
|
||||
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
return result
|
||||
|
||||
def tts(text, play_audio):
|
||||
response = client.audio.speech.create(
|
||||
model="tts-1",
|
||||
voice="alloy",
|
||||
input=text,
|
||||
response_format="mp3"
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp3") as temp_file:
|
||||
response.stream_to_file(temp_file.name)
|
||||
|
||||
if play_audio:
|
||||
playsound(temp_file.name)
|
||||
|
||||
return temp_file.read()
|
||||
if os.getenv('ALL_LOCAL') == 'False':
|
||||
response = client.audio.speech.create(
|
||||
model="tts-1",
|
||||
voice="alloy",
|
||||
input=text,
|
||||
response_format="mp3"
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp3") as temp_file:
|
||||
response.stream_to_file(temp_file.name)
|
||||
|
||||
if play_audio:
|
||||
playsound(temp_file.name)
|
||||
|
||||
return temp_file.read()
|
||||
else:
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
||||
output_file = temp_file.name
|
||||
piper_dir = os.path.join(os.path.dirname(__file__), 'local_tts', 'piper')
|
||||
subprocess.run([
|
||||
os.path.join(piper_dir, 'piper'),
|
||||
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')),
|
||||
'--output_file', output_file
|
||||
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
|
||||
if play_audio:
|
||||
playsound(temp_file.name)
|
||||
return temp_file.read()
|
||||
|
|
Loading…
Reference in New Issue