add mobile flag
This commit is contained in:
parent
2e0ab15e5b
commit
3dea99470a
|
@ -182,7 +182,7 @@ const Main: React.FC<MainProps> = ({ route }) => {
|
|||
try {
|
||||
const message = JSON.parse(e.data);
|
||||
|
||||
if (message.content && typeof message.content === "string") {
|
||||
if (message.content && message.type === "audio") {
|
||||
console.log("✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅ Audio message");
|
||||
|
||||
const buffer = message.content;
|
||||
|
|
|
@ -39,6 +39,8 @@ print("")
|
|||
|
||||
setup_logging()
|
||||
|
||||
accumulator_global = Accumulator()
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
app_dir = user_data_dir("01")
|
||||
|
@ -196,26 +198,11 @@ async def send_messages(websocket: WebSocket):
|
|||
|
||||
try:
|
||||
if isinstance(message, dict):
|
||||
# print(f"Sending to the device: {type(message)} {str(message)[:100]}")
|
||||
print(f"Sending to the device: {type(message)} {str(message)[:100]}")
|
||||
await websocket.send_json(message)
|
||||
elif isinstance(message, bytes):
|
||||
message = base64.b64encode(message)
|
||||
# print(f"Sending to the device: {type(message)} {str(message)[:100]}")
|
||||
print(f"Sending to the device: {type(message)} {str(message)[:100]}")
|
||||
await websocket.send_bytes(message)
|
||||
|
||||
"""
|
||||
str_bytes = str(message)
|
||||
json_bytes = {
|
||||
"role": "assistant",
|
||||
"type": "audio",
|
||||
"format": "message",
|
||||
"content": str_bytes,
|
||||
}
|
||||
print(
|
||||
f"Sending to the device: {type(json_bytes)} {str(json_bytes)[:100]}"
|
||||
)
|
||||
await websocket.send_json(json_bytes)
|
||||
"""
|
||||
else:
|
||||
raise TypeError("Message must be a dict or bytes")
|
||||
except:
|
||||
|
@ -224,10 +211,11 @@ async def send_messages(websocket: WebSocket):
|
|||
raise
|
||||
|
||||
|
||||
async def listener():
|
||||
async def listener(mobile: bool):
|
||||
while True:
|
||||
try:
|
||||
accumulator = Accumulator()
|
||||
if mobile:
|
||||
accumulator_mobile = Accumulator()
|
||||
|
||||
while True:
|
||||
if not from_user.empty():
|
||||
|
@ -238,7 +226,11 @@ async def listener():
|
|||
break
|
||||
await asyncio.sleep(1)
|
||||
|
||||
message = accumulator.accumulate(chunk)
|
||||
if mobile:
|
||||
message = accumulator_mobile.accumulate(chunk, mobile)
|
||||
else:
|
||||
message = accumulator_global.accumulate(chunk, mobile)
|
||||
|
||||
if message == None:
|
||||
# Will be None until we have a full message ready
|
||||
continue
|
||||
|
@ -305,8 +297,9 @@ async def listener():
|
|||
logger.debug("Got chunk:", chunk)
|
||||
|
||||
# Send it to the user
|
||||
# await to_device.put(chunk)
|
||||
# Yield to the event loop, so you actually send it out
|
||||
await to_device.put(chunk)
|
||||
|
||||
# Yield to the event loop, so you actxually send it out
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
if os.getenv("TTS_RUNNER") == "server":
|
||||
|
@ -328,11 +321,11 @@ async def listener():
|
|||
|
||||
if is_full_sentence(sentences[-1]):
|
||||
for sentence in sentences:
|
||||
await stream_tts_to_device(sentence)
|
||||
await stream_tts_to_device(sentence, mobile)
|
||||
accumulated_text = ""
|
||||
else:
|
||||
for sentence in sentences[:-1]:
|
||||
await stream_tts_to_device(sentence)
|
||||
await stream_tts_to_device(sentence, mobile)
|
||||
accumulated_text = sentences[-1]
|
||||
|
||||
# If we're going to speak, say we're going to stop sending text.
|
||||
|
@ -376,7 +369,7 @@ async def listener():
|
|||
traceback.print_exc()
|
||||
|
||||
|
||||
async def stream_tts_to_device(sentence):
|
||||
async def stream_tts_to_device(sentence, mobile: bool):
|
||||
force_task_completion_responses = [
|
||||
"the task is done",
|
||||
"the task is impossible",
|
||||
|
@ -385,49 +378,44 @@ async def stream_tts_to_device(sentence):
|
|||
if sentence.lower().strip().strip(".!?").strip() in force_task_completion_responses:
|
||||
return
|
||||
|
||||
for chunk in stream_tts(sentence):
|
||||
for chunk in stream_tts(sentence, mobile):
|
||||
await to_device.put(chunk)
|
||||
|
||||
|
||||
def stream_tts(sentence):
|
||||
audio_file = tts(sentence)
|
||||
def stream_tts(sentence, mobile: bool):
|
||||
audio_file = tts(sentence, mobile)
|
||||
|
||||
with open(audio_file, "rb") as f:
|
||||
audio_bytes = f.read()
|
||||
desktop_path = os.path.join(os.path.expanduser("~"), "Desktop")
|
||||
desktop_audio_file = os.path.join(
|
||||
desktop_path, f"{datetime.datetime.now()}" + os.path.basename(audio_file)
|
||||
)
|
||||
shutil.copy(audio_file, desktop_audio_file)
|
||||
print(f"Audio file saved to Desktop: {desktop_audio_file}")
|
||||
# storage_client = storage.Client(project="react-native-421323")
|
||||
# bucket = storage_client.bucket("01-audio")
|
||||
# blob = bucket.blob(f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
# generation_match_precondition = 0
|
||||
|
||||
# blob.upload_from_filename(
|
||||
# audio_file, if_generation_match=generation_match_precondition
|
||||
# )
|
||||
# print(
|
||||
# f"Audio file {audio_file} uploaded to {datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav"
|
||||
# )
|
||||
|
||||
file_type = "audio/wav"
|
||||
# Read the entire WAV file
|
||||
with open(audio_file, "rb") as f:
|
||||
audio_bytes = f.read()
|
||||
|
||||
os.remove(audio_file)
|
||||
if mobile:
|
||||
file_type = "audio/wav"
|
||||
|
||||
# Stream the audio as a single message
|
||||
yield {
|
||||
"role": "assistant",
|
||||
"type": "audio",
|
||||
"format": file_type,
|
||||
"content": base64.b64encode(audio_bytes).decode("utf-8"),
|
||||
"start": True,
|
||||
"end": True,
|
||||
}
|
||||
os.remove(audio_file)
|
||||
|
||||
# stream the audio as a single sentence
|
||||
yield {
|
||||
"role": "assistant",
|
||||
"type": "audio",
|
||||
"format": file_type,
|
||||
"content": base64.b64encode(audio_bytes).decode("utf-8"),
|
||||
"start": True,
|
||||
"end": True,
|
||||
}
|
||||
|
||||
else:
|
||||
# stream the audio in chunk sizes
|
||||
os.remove(audio_file)
|
||||
|
||||
file_type = "bytes.raw"
|
||||
chunk_size = 1024
|
||||
|
||||
yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
|
||||
for i in range(0, len(audio_bytes), chunk_size):
|
||||
chunk = audio_bytes[i : i + chunk_size]
|
||||
yield chunk
|
||||
yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
|
||||
|
||||
|
||||
from uvicorn import Config, Server
|
||||
|
@ -464,6 +452,7 @@ async def main(
|
|||
temperature,
|
||||
tts_service,
|
||||
stt_service,
|
||||
mobile,
|
||||
):
|
||||
global HOST
|
||||
global PORT
|
||||
|
@ -515,7 +504,7 @@ async def main(
|
|||
interpreter.llm.completions = llm
|
||||
|
||||
# Start listening
|
||||
asyncio.create_task(listener())
|
||||
asyncio.create_task(listener(mobile))
|
||||
|
||||
# Start watching the kernel if it's your job to do that
|
||||
if True: # in the future, code can run on device. for now, just server.
|
||||
|
|
|
@ -25,7 +25,7 @@ class Tts:
|
|||
def __init__(self, config):
|
||||
pass
|
||||
|
||||
def tts(self, text):
|
||||
def tts(self, text, mobile):
|
||||
response = client.audio.speech.create(
|
||||
model="tts-1",
|
||||
voice=os.getenv("OPENAI_VOICE_NAME", "alloy"),
|
||||
|
@ -36,9 +36,15 @@ class Tts:
|
|||
response.stream_to_file(temp_file.name)
|
||||
|
||||
# TODO: hack to format audio correctly for device
|
||||
outfile = tempfile.gettempdir() + "/" + "output.wav"
|
||||
ffmpeg.input(temp_file.name).output(
|
||||
outfile, f="wav", ar="16000", ac="1", loglevel="panic"
|
||||
).run()
|
||||
if mobile:
|
||||
outfile = tempfile.gettempdir() + "/" + "output.wav"
|
||||
ffmpeg.input(temp_file.name).output(
|
||||
outfile, f="wav", ar="16000", ac="1", loglevel="panic"
|
||||
).run()
|
||||
else:
|
||||
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
||||
ffmpeg.input(temp_file.name).output(
|
||||
outfile, f="s16le", ar="16000", ac="1", loglevel="panic"
|
||||
).run()
|
||||
|
||||
return outfile
|
||||
|
|
|
@ -3,7 +3,7 @@ class Accumulator:
|
|||
self.template = {"role": None, "type": None, "format": None, "content": None}
|
||||
self.message = self.template
|
||||
|
||||
def accumulate(self, chunk):
|
||||
def accumulate(self, chunk, mobile):
|
||||
# print(str(chunk)[:100])
|
||||
if type(chunk) == dict:
|
||||
if "format" in chunk and chunk["format"] == "active_line":
|
||||
|
@ -44,6 +44,10 @@ class Accumulator:
|
|||
if "content" not in self.message or type(self.message["content"]) != bytes:
|
||||
self.message["content"] = b""
|
||||
self.message["content"] += chunk
|
||||
self.message["type"] = "audio"
|
||||
self.message["format"] = "bytes.wav"
|
||||
return self.message
|
||||
|
||||
if mobile:
|
||||
self.message["type"] = "audio"
|
||||
self.message["format"] = "bytes.wav"
|
||||
return self.message
|
||||
else:
|
||||
return None
|
||||
|
|
|
@ -72,13 +72,16 @@ def run(
|
|||
False, "--local", help="Use recommended local services for LLM, STT, and TTS"
|
||||
),
|
||||
qr: bool = typer.Option(False, "--qr", help="Print the QR code for the server URL"),
|
||||
mobile: bool = typer.Option(
|
||||
False, "--mobile", help="Toggle server to support mobile app"
|
||||
),
|
||||
):
|
||||
_run(
|
||||
server=server,
|
||||
server=server or mobile,
|
||||
server_host=server_host,
|
||||
server_port=server_port,
|
||||
tunnel_service=tunnel_service,
|
||||
expose=expose,
|
||||
expose=expose or mobile,
|
||||
client=client,
|
||||
server_url=server_url,
|
||||
client_type=client_type,
|
||||
|
@ -92,7 +95,8 @@ def run(
|
|||
tts_service=tts_service,
|
||||
stt_service=stt_service,
|
||||
local=local,
|
||||
qr=qr,
|
||||
qr=qr or mobile,
|
||||
mobile=mobile,
|
||||
)
|
||||
|
||||
|
||||
|
@ -116,6 +120,7 @@ def _run(
|
|||
stt_service: str = "openai",
|
||||
local: bool = False,
|
||||
qr: bool = False,
|
||||
mobile: bool = False,
|
||||
):
|
||||
if local:
|
||||
tts_service = "piper"
|
||||
|
@ -136,6 +141,7 @@ def _run(
|
|||
signal.signal(signal.SIGINT, handle_exit)
|
||||
|
||||
if server:
|
||||
print(f"Starting server with mobile = {mobile}")
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
server_thread = threading.Thread(
|
||||
|
@ -153,6 +159,7 @@ def _run(
|
|||
temperature,
|
||||
tts_service,
|
||||
stt_service,
|
||||
mobile,
|
||||
),
|
||||
),
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue