Merge pull request #34 from tomchapin/feature/camera-snapshots
Feature/camera snapshots (WIP)
This commit is contained in:
commit
f2e51dd14f
|
@ -167,3 +167,4 @@ cython_debug/
|
||||||
|
|
||||||
# ignore the aifs index files
|
# ignore the aifs index files
|
||||||
_.aifs
|
_.aifs
|
||||||
|
01OS/output_audio.wav
|
||||||
|
|
|
@ -35,6 +35,17 @@ STT_RUNNER=client # If server, audio will be sent over websocket.
|
||||||
# Will expose the server publically and display that URL.
|
# Will expose the server publically and display that URL.
|
||||||
SERVER_EXPOSE_PUBLICALLY=False
|
SERVER_EXPOSE_PUBLICALLY=False
|
||||||
|
|
||||||
|
# Image capture settings
|
||||||
|
CAMERA_ENABLED=True
|
||||||
|
|
||||||
|
# Camera device selection (Typically 0 for built-in, 1 for USB)
|
||||||
|
CAMERA_DEVICE_INDEX=0
|
||||||
|
|
||||||
|
# Camera warmup time
|
||||||
|
# This is a workaround for some cameras that don't immediately
|
||||||
|
# return a properly exposed picture when they are first turned on
|
||||||
|
CAMERA_WARMUP_SECONDS=0.4
|
||||||
|
|
||||||
# Debug level
|
# Debug level
|
||||||
# LOG_LEVEL=DEBUG
|
# LOG_LEVEL=DEBUG
|
||||||
LOG_LEVEL="INFO"
|
LOG_LEVEL="INFO"
|
|
@ -1,6 +1,7 @@
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
load_dotenv() # take environment variables from .env.
|
load_dotenv() # take environment variables from .env.
|
||||||
|
|
||||||
|
import os
|
||||||
import asyncio
|
import asyncio
|
||||||
import threading
|
import threading
|
||||||
import os
|
import os
|
||||||
|
@ -21,6 +22,8 @@ import time
|
||||||
import wave
|
import wave
|
||||||
import tempfile
|
import tempfile
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import cv2
|
||||||
|
import base64
|
||||||
from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
|
from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
|
||||||
# In the future, I guess kernel watching code should be elsewhere? Somewhere server / client agnostic?
|
# In the future, I guess kernel watching code should be elsewhere? Somewhere server / client agnostic?
|
||||||
from ..server.utils.kernel import put_kernel_messages_into_queue
|
from ..server.utils.kernel import put_kernel_messages_into_queue
|
||||||
|
@ -44,6 +47,11 @@ RATE = 44100 # Sample rate
|
||||||
RECORDING = False # Flag to control recording state
|
RECORDING = False # Flag to control recording state
|
||||||
SPACEBAR_PRESSED = False # Flag to track spacebar press state
|
SPACEBAR_PRESSED = False # Flag to track spacebar press state
|
||||||
|
|
||||||
|
# Camera configuration
|
||||||
|
CAMERA_ENABLED = bool(os.getenv('CAMERA_ENABLED', False))
|
||||||
|
CAMERA_DEVICE_INDEX = int(os.getenv('CAMERA_DEVICE_INDEX', 0))
|
||||||
|
CAMERA_WARMUP_SECONDS = float(os.getenv('CAMERA_WARMUP_SECONDS', 0))
|
||||||
|
|
||||||
# Specify OS
|
# Specify OS
|
||||||
current_platform = get_system_info()
|
current_platform = get_system_info()
|
||||||
|
|
||||||
|
@ -54,9 +62,64 @@ send_queue = queue.Queue()
|
||||||
|
|
||||||
class Device:
|
class Device:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self.pressed_keys = set()
|
||||||
|
self.captured_images = []
|
||||||
self.audiosegments = []
|
self.audiosegments = []
|
||||||
pass
|
|
||||||
|
|
||||||
|
def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
|
||||||
|
"""Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
|
||||||
|
image_path = None
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture(camera_index)
|
||||||
|
ret, frame = cap.read() # Capture a single frame to initialize the camera
|
||||||
|
|
||||||
|
if CAMERA_WARMUP_SECONDS > 0:
|
||||||
|
# Allow camera to warm up, then snap a picture again
|
||||||
|
# This is a workaround for some cameras that don't return a properly exposed
|
||||||
|
# picture immediately when they are first turned on
|
||||||
|
time.sleep(CAMERA_WARMUP_SECONDS)
|
||||||
|
ret, frame = cap.read()
|
||||||
|
|
||||||
|
if ret:
|
||||||
|
temp_dir = tempfile.gettempdir()
|
||||||
|
image_path = os.path.join(temp_dir, f"01_photo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.png")
|
||||||
|
self.captured_images.append(image_path)
|
||||||
|
cv2.imwrite(image_path, frame)
|
||||||
|
logger.info(f"Camera image captured to {image_path}")
|
||||||
|
logger.info(f"You now have {len(self.captured_images)} images which will be sent along with your next audio message.")
|
||||||
|
else:
|
||||||
|
logger.error(f"Error: Couldn't capture an image from camera ({camera_index})")
|
||||||
|
|
||||||
|
cap.release()
|
||||||
|
|
||||||
|
return image_path
|
||||||
|
|
||||||
|
|
||||||
|
def encode_image_to_base64(self, image_path):
|
||||||
|
"""Encodes an image file to a base64 string."""
|
||||||
|
with open(image_path, "rb") as image_file:
|
||||||
|
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||||
|
|
||||||
|
def add_image_to_send_queue(self, image_path):
|
||||||
|
"""Encodes an image and adds an LMC message to the send queue with the image data."""
|
||||||
|
base64_image = self.encode_image_to_base64(image_path)
|
||||||
|
image_message = {
|
||||||
|
"role": "user",
|
||||||
|
"type": "image",
|
||||||
|
"format": "base64.png",
|
||||||
|
"content": base64_image
|
||||||
|
}
|
||||||
|
send_queue.put(image_message)
|
||||||
|
# Delete the image file from the file system after sending it
|
||||||
|
os.remove(image_path)
|
||||||
|
|
||||||
|
def queue_all_captured_images(self):
|
||||||
|
"""Queues all captured images to be sent."""
|
||||||
|
for image_path in self.captured_images:
|
||||||
|
self.add_image_to_send_queue(image_path)
|
||||||
|
self.captured_images.clear() # Clear the list after sending
|
||||||
|
|
||||||
|
|
||||||
async def play_audiosegments(self):
|
async def play_audiosegments(self):
|
||||||
"""Plays them sequentially."""
|
"""Plays them sequentially."""
|
||||||
while True:
|
while True:
|
||||||
|
@ -112,6 +175,8 @@ class Device:
|
||||||
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "content": ""})
|
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "content": ""})
|
||||||
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "end": True})
|
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "end": True})
|
||||||
else:
|
else:
|
||||||
|
self.queue_all_captured_images()
|
||||||
|
|
||||||
if os.getenv('STT_RUNNER') == "client":
|
if os.getenv('STT_RUNNER') == "client":
|
||||||
# Run stt then send text
|
# Run stt then send text
|
||||||
text = stt_wav(wav_path)
|
text = stt_wav(wav_path)
|
||||||
|
@ -142,18 +207,28 @@ class Device:
|
||||||
RECORDING = False
|
RECORDING = False
|
||||||
|
|
||||||
def on_press(self, key):
|
def on_press(self, key):
|
||||||
"""Detect spacebar press."""
|
"""Detect spacebar press, ESC key press, and Ctrl+C combination."""
|
||||||
if key == keyboard.Key.space:
|
self.pressed_keys.add(key) # Add the pressed key to the set
|
||||||
self.toggle_recording(True)
|
|
||||||
|
|
||||||
def on_release(self, key):
|
if keyboard.Key.esc in self.pressed_keys:
|
||||||
"""Detect spacebar release and ESC key press."""
|
|
||||||
if key == keyboard.Key.space:
|
|
||||||
self.toggle_recording(False)
|
|
||||||
elif key == keyboard.Key.esc or (key == keyboard.Key.ctrl and keyboard.Key.c):
|
|
||||||
logger.info("Exiting...")
|
logger.info("Exiting...")
|
||||||
os._exit(0)
|
os._exit(0)
|
||||||
|
elif keyboard.Key.space in self.pressed_keys:
|
||||||
|
self.toggle_recording(True)
|
||||||
|
elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char('c')} <= self.pressed_keys:
|
||||||
|
logger.info("Ctrl+C pressed. Exiting...")
|
||||||
|
os._exit(0)
|
||||||
|
|
||||||
|
def on_release(self, key):
|
||||||
|
"""Detect spacebar release and 'c' key press for camera, and handle key release."""
|
||||||
|
self.pressed_keys.discard(key) # Remove the released key from the key press tracking set
|
||||||
|
|
||||||
|
if key == keyboard.Key.space:
|
||||||
|
self.toggle_recording(False)
|
||||||
|
elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char('c'):
|
||||||
|
self.fetch_image_from_camera()
|
||||||
|
|
||||||
|
|
||||||
async def message_sender(self, websocket):
|
async def message_sender(self, websocket):
|
||||||
while True:
|
while True:
|
||||||
message = await asyncio.get_event_loop().run_in_executor(None, send_queue.get)
|
message = await asyncio.get_event_loop().run_in_executor(None, send_queue.get)
|
||||||
|
@ -168,7 +243,11 @@ class Device:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
async with websockets.connect(WS_URL) as websocket:
|
async with websockets.connect(WS_URL) as websocket:
|
||||||
logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
|
if CAMERA_ENABLED:
|
||||||
|
logger.info("Press the spacebar to start/stop recording. Press 'c' to capture an image from the camera. Press ESC to exit.")
|
||||||
|
else:
|
||||||
|
logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
|
||||||
|
|
||||||
asyncio.create_task(self.message_sender(websocket))
|
asyncio.create_task(self.message_sender(websocket))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
DEVICE=$(uname -n)
|
DEVICE=$(uname -n)
|
||||||
if [[ "$DEVICE" == "rpi" ]]; then
|
if [[ "$DEVICE" == "rpi" ]]; then
|
||||||
cd 01OS
|
cd 01OS
|
||||||
python -m 01OS.clients.rpi.device &
|
python -m 01OS.clients.rpi.device
|
||||||
else
|
else
|
||||||
cd 01OS
|
cd 01OS
|
||||||
python -m 01OS.clients.macos.device &
|
python -m 01OS.clients.macos.device
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -1890,6 +1890,31 @@ typing-extensions = ">=4.7,<5"
|
||||||
[package.extras]
|
[package.extras]
|
||||||
datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
|
datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opencv-python"
|
||||||
|
version = "4.9.0.80"
|
||||||
|
description = "Wrapper package for OpenCV python bindings."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
files = [
|
||||||
|
{file = "opencv-python-4.9.0.80.tar.gz", hash = "sha256:1a9f0e6267de3a1a1db0c54213d022c7c8b5b9ca4b580e80bdc58516c922c9e1"},
|
||||||
|
{file = "opencv_python-4.9.0.80-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:7e5f7aa4486651a6ebfa8ed4b594b65bd2d2f41beeb4241a3e4b1b85acbbbadb"},
|
||||||
|
{file = "opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71dfb9555ccccdd77305fc3dcca5897fbf0cf28b297c51ee55e079c065d812a3"},
|
||||||
|
{file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b34a52e9da36dda8c151c6394aed602e4b17fa041df0b9f5b93ae10b0fcca2a"},
|
||||||
|
{file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4088cab82b66a3b37ffc452976b14a3c599269c247895ae9ceb4066d8188a57"},
|
||||||
|
{file = "opencv_python-4.9.0.80-cp37-abi3-win32.whl", hash = "sha256:dcf000c36dd1651118a2462257e3a9e76db789a78432e1f303c7bac54f63ef6c"},
|
||||||
|
{file = "opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl", hash = "sha256:3f16f08e02b2a2da44259c7cc712e779eff1dd8b55fdb0323e8cab09548086c0"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
numpy = [
|
||||||
|
{version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
|
||||||
|
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
|
||||||
|
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
|
||||||
|
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
|
||||||
|
{version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "packaging"
|
name = "packaging"
|
||||||
version = "23.2"
|
version = "23.2"
|
||||||
|
@ -3514,4 +3539,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.9,<3.12"
|
python-versions = ">=3.9,<3.12"
|
||||||
content-hash = "12ccff8a2521e7eb88eee82cfd3de409fea8e1658406d6148a42f9347ca7b2a7"
|
content-hash = "5c8d587b405e97c0dca454078950157106f9aea687cbecce5b7ae7effd2aeece"
|
||||||
|
|
|
@ -25,6 +25,7 @@ pydub = "^0.25.1"
|
||||||
ngrok = "^1.0.0"
|
ngrok = "^1.0.0"
|
||||||
open-interpreter = "^0.2.0"
|
open-interpreter = "^0.2.0"
|
||||||
simpleaudio = "^1.0.4"
|
simpleaudio = "^1.0.4"
|
||||||
|
opencv-python = "^4.9.0.80"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core"]
|
requires = ["poetry-core"]
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Set python to prioritize the module files from the current directory
|
||||||
|
# If we don't do this, then the python interpreter will not be able to find the modules,
|
||||||
|
# and will throw an error like "ModuleNotFoundError: No module named '01OS'".
|
||||||
|
# If we solve the problem by pip installing the official 01OS package, then those
|
||||||
|
# modules will run instead of the local ones that we are trying to develop with.
|
||||||
|
export PYTHONPATH="$(pwd):$PYTHONPATH"
|
||||||
|
|
||||||
### Import Environment Variables from .env
|
### Import Environment Variables from .env
|
||||||
SCRIPT_DIR="$(dirname "$0")"
|
SCRIPT_DIR="$(dirname "$0")"
|
||||||
if [ ! -f "$SCRIPT_DIR/.env" ]; then
|
if [ ! -f "$SCRIPT_DIR/.env" ]; then
|
||||||
|
|
Loading…
Reference in New Issue