Merge pull request #28 from shivenmian/u/shivenmian/local
fix: auto-download STT + fixed breaking change where keyboard code unreachable
This commit is contained in:
commit
b31623cf1b
|
@ -2,15 +2,17 @@
|
||||||
# Copy this file and rename it to ".env" to use it.
|
# Copy this file and rename it to ".env" to use it.
|
||||||
|
|
||||||
# If ALL_LOCAL is False, we'll use OpenAI's services
|
# If ALL_LOCAL is False, we'll use OpenAI's services
|
||||||
# If setting ALL_LOCAL to true, set the path to the WHISPER local model
|
# else we use whisper.cpp and piper local models
|
||||||
ALL_LOCAL=False
|
ALL_LOCAL=False
|
||||||
# WHISPER_MODEL_PATH=...
|
WHISPER_MODEL_NAME="ggml-tiny.en.bin"
|
||||||
|
|
||||||
|
# Uncomment and set the OpenAI API key for OpenInterpreter to work
|
||||||
# OPENAI_API_KEY=sk-...
|
# OPENAI_API_KEY=sk-...
|
||||||
|
|
||||||
# For TTS, we use the en_US-lessac-medium voice model by default
|
# For TTS, we use the en_US-lessac-medium voice model by default
|
||||||
# Please change the voice URL and voice name if you wish to use another voice
|
# Please change the voice URL and voice name if you wish to use another voice
|
||||||
export PIPER_VOICE_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/"
|
PIPER_VOICE_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/"
|
||||||
export PIPER_VOICE_NAME="en_US-lessac-medium.onnx"
|
PIPER_VOICE_NAME="en_US-lessac-medium.onnx"
|
||||||
|
|
||||||
# If SERVER_START, this is where we'll serve the server.
|
# If SERVER_START, this is where we'll serve the server.
|
||||||
# If DEVICE_START, this is where the device expects the server to be.
|
# If DEVICE_START, this is where the device expects the server to be.
|
||||||
|
|
|
@ -230,10 +230,10 @@ if __name__ == "__main__":
|
||||||
toggle_recording(True)
|
toggle_recording(True)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
# Keyboard listener for spacebar press/release
|
# Keyboard listener for spacebar press/release
|
||||||
listener = keyboard.Listener(on_press=on_press, on_release=on_release)
|
listener = keyboard.Listener(on_press=on_press, on_release=on_release)
|
||||||
listener.start()
|
listener.start()
|
||||||
|
|
||||||
asyncio.run(main())
|
asyncio.run(main())
|
||||||
p.terminate()
|
p.terminate()
|
|
@ -9,9 +9,10 @@ set -a; source .env; set +a
|
||||||
|
|
||||||
### SETUP
|
### SETUP
|
||||||
|
|
||||||
# if using local models, install the models / executables
|
|
||||||
|
|
||||||
if [[ "$ALL_LOCAL" == "True" ]]; then
|
if [[ "$ALL_LOCAL" == "True" ]]; then
|
||||||
|
# if using local models, install the models / executables
|
||||||
|
WHISPER_MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/"
|
||||||
|
WHISPER_RUST_PATH="`pwd`/local_stt/whisper-rust"
|
||||||
curl -OL "${WHISPER_MODEL_URL}${WHISPER_MODEL_NAME}" --output-dir ${WHISPER_RUST_PATH}
|
curl -OL "${WHISPER_MODEL_URL}${WHISPER_MODEL_NAME}" --output-dir ${WHISPER_RUST_PATH}
|
||||||
OS=$(uname -s)
|
OS=$(uname -s)
|
||||||
ARCH=$(uname -m)
|
ARCH=$(uname -m)
|
||||||
|
|
11
OS/01/stt.py
11
OS/01/stt.py
|
@ -56,13 +56,14 @@ def run_command(command):
|
||||||
return result.stdout, result.stderr
|
return result.stdout, result.stderr
|
||||||
|
|
||||||
def get_transcription_file(wav_file_path: str):
|
def get_transcription_file(wav_file_path: str):
|
||||||
model_path = os.getenv("WHISPER_MODEL_PATH")
|
whisper_rust_path = os.path.join(os.path.dirname(__file__), 'local_stt', 'whisper-rust')
|
||||||
if not model_path:
|
model_name = os.getenv('WHISPER_MODEL_NAME')
|
||||||
raise EnvironmentError("WHISPER_MODEL_PATH environment variable is not set.")
|
if not model_name:
|
||||||
|
raise EnvironmentError("WHISPER_MODEL_NAME environment variable is not set.")
|
||||||
|
|
||||||
output, error = run_command([
|
output, error = run_command([
|
||||||
os.path.join(os.path.dirname(__file__), 'local_stt', 'whisper-rust', 'whisper-rust'),
|
os.path.join(whisper_rust_path, 'whisper-rust'),
|
||||||
'--model-path', model_path,
|
'--model-path', os.path.join(whisper_rust_path, model_name),
|
||||||
'--file-path', wav_file_path
|
'--file-path', wav_file_path
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
|
@ -27,8 +27,6 @@ python -m pip install -r requirements.txt
|
||||||
```
|
```
|
||||||
NB: Depending on your local Python version, you may run into [this issue↗](https://github.com/TaylorSMarks/playsound/issues/150) installing playsound. Workarounds are provided in the issue.
|
NB: Depending on your local Python version, you may run into [this issue↗](https://github.com/TaylorSMarks/playsound/issues/150) installing playsound. Workarounds are provided in the issue.
|
||||||
|
|
||||||
If you want to run local speech-to-text from whisper, download the GGML Whisper model from [Huggingface](https://huggingface.co/ggerganov/whisper.cpp). Then in `OS/01/start.sh`, set `ALL_LOCAL=TRUE` and set `WHISPER_MODEL_PATH` to the path of the model.
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@ -36,6 +34,7 @@ cd OS/01
|
||||||
bash start.sh
|
bash start.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you want to run local text-to-speech and speech-to-text, set `ALL_LOCAL` in the `start.sh` script to True. This will use the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [Piper](https://github.com/rhasspy/piper) models.
|
||||||
<br>
|
<br>
|
||||||
|
|
||||||
## Background
|
## Background
|
||||||
|
|
Loading…
Reference in New Issue