Merge pull request #34 from tomchapin/feature/camera-snapshots
Feature/camera snapshots (WIP)
This commit is contained in:
commit
f2e51dd14f
|
@ -167,3 +167,4 @@ cython_debug/
|
|||
|
||||
# ignore the aifs index files
|
||||
_.aifs
|
||||
01OS/output_audio.wav
|
||||
|
|
|
@ -35,6 +35,17 @@ STT_RUNNER=client # If server, audio will be sent over websocket.
|
|||
# Will expose the server publically and display that URL.
|
||||
SERVER_EXPOSE_PUBLICALLY=False
|
||||
|
||||
# Image capture settings
|
||||
CAMERA_ENABLED=True
|
||||
|
||||
# Camera device selection (Typically 0 for built-in, 1 for USB)
|
||||
CAMERA_DEVICE_INDEX=0
|
||||
|
||||
# Camera warmup time
|
||||
# This is a workaround for some cameras that don't immediately
|
||||
# return a properly exposed picture when they are first turned on
|
||||
CAMERA_WARMUP_SECONDS=0.4
|
||||
|
||||
# Debug level
|
||||
# LOG_LEVEL=DEBUG
|
||||
LOG_LEVEL="INFO"
|
|
@ -1,6 +1,7 @@
|
|||
from dotenv import load_dotenv
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import threading
|
||||
import os
|
||||
|
@ -21,6 +22,8 @@ import time
|
|||
import wave
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
import cv2
|
||||
import base64
|
||||
from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
|
||||
# In the future, I guess kernel watching code should be elsewhere? Somewhere server / client agnostic?
|
||||
from ..server.utils.kernel import put_kernel_messages_into_queue
|
||||
|
@ -44,6 +47,11 @@ RATE = 44100 # Sample rate
|
|||
RECORDING = False # Flag to control recording state
|
||||
SPACEBAR_PRESSED = False # Flag to track spacebar press state
|
||||
|
||||
# Camera configuration
|
||||
CAMERA_ENABLED = bool(os.getenv('CAMERA_ENABLED', False))
|
||||
CAMERA_DEVICE_INDEX = int(os.getenv('CAMERA_DEVICE_INDEX', 0))
|
||||
CAMERA_WARMUP_SECONDS = float(os.getenv('CAMERA_WARMUP_SECONDS', 0))
|
||||
|
||||
# Specify OS
|
||||
current_platform = get_system_info()
|
||||
|
||||
|
@ -54,9 +62,64 @@ send_queue = queue.Queue()
|
|||
|
||||
class Device:
|
||||
def __init__(self):
|
||||
self.pressed_keys = set()
|
||||
self.captured_images = []
|
||||
self.audiosegments = []
|
||||
pass
|
||||
|
||||
def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
|
||||
"""Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
|
||||
image_path = None
|
||||
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
ret, frame = cap.read() # Capture a single frame to initialize the camera
|
||||
|
||||
if CAMERA_WARMUP_SECONDS > 0:
|
||||
# Allow camera to warm up, then snap a picture again
|
||||
# This is a workaround for some cameras that don't return a properly exposed
|
||||
# picture immediately when they are first turned on
|
||||
time.sleep(CAMERA_WARMUP_SECONDS)
|
||||
ret, frame = cap.read()
|
||||
|
||||
if ret:
|
||||
temp_dir = tempfile.gettempdir()
|
||||
image_path = os.path.join(temp_dir, f"01_photo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.png")
|
||||
self.captured_images.append(image_path)
|
||||
cv2.imwrite(image_path, frame)
|
||||
logger.info(f"Camera image captured to {image_path}")
|
||||
logger.info(f"You now have {len(self.captured_images)} images which will be sent along with your next audio message.")
|
||||
else:
|
||||
logger.error(f"Error: Couldn't capture an image from camera ({camera_index})")
|
||||
|
||||
cap.release()
|
||||
|
||||
return image_path
|
||||
|
||||
|
||||
def encode_image_to_base64(self, image_path):
|
||||
"""Encodes an image file to a base64 string."""
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||
|
||||
def add_image_to_send_queue(self, image_path):
|
||||
"""Encodes an image and adds an LMC message to the send queue with the image data."""
|
||||
base64_image = self.encode_image_to_base64(image_path)
|
||||
image_message = {
|
||||
"role": "user",
|
||||
"type": "image",
|
||||
"format": "base64.png",
|
||||
"content": base64_image
|
||||
}
|
||||
send_queue.put(image_message)
|
||||
# Delete the image file from the file system after sending it
|
||||
os.remove(image_path)
|
||||
|
||||
def queue_all_captured_images(self):
|
||||
"""Queues all captured images to be sent."""
|
||||
for image_path in self.captured_images:
|
||||
self.add_image_to_send_queue(image_path)
|
||||
self.captured_images.clear() # Clear the list after sending
|
||||
|
||||
|
||||
async def play_audiosegments(self):
|
||||
"""Plays them sequentially."""
|
||||
while True:
|
||||
|
@ -112,6 +175,8 @@ class Device:
|
|||
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "content": ""})
|
||||
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "end": True})
|
||||
else:
|
||||
self.queue_all_captured_images()
|
||||
|
||||
if os.getenv('STT_RUNNER') == "client":
|
||||
# Run stt then send text
|
||||
text = stt_wav(wav_path)
|
||||
|
@ -142,18 +207,28 @@ class Device:
|
|||
RECORDING = False
|
||||
|
||||
def on_press(self, key):
|
||||
"""Detect spacebar press."""
|
||||
if key == keyboard.Key.space:
|
||||
self.toggle_recording(True)
|
||||
"""Detect spacebar press, ESC key press, and Ctrl+C combination."""
|
||||
self.pressed_keys.add(key) # Add the pressed key to the set
|
||||
|
||||
def on_release(self, key):
|
||||
"""Detect spacebar release and ESC key press."""
|
||||
if key == keyboard.Key.space:
|
||||
self.toggle_recording(False)
|
||||
elif key == keyboard.Key.esc or (key == keyboard.Key.ctrl and keyboard.Key.c):
|
||||
if keyboard.Key.esc in self.pressed_keys:
|
||||
logger.info("Exiting...")
|
||||
os._exit(0)
|
||||
elif keyboard.Key.space in self.pressed_keys:
|
||||
self.toggle_recording(True)
|
||||
elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char('c')} <= self.pressed_keys:
|
||||
logger.info("Ctrl+C pressed. Exiting...")
|
||||
os._exit(0)
|
||||
|
||||
def on_release(self, key):
|
||||
"""Detect spacebar release and 'c' key press for camera, and handle key release."""
|
||||
self.pressed_keys.discard(key) # Remove the released key from the key press tracking set
|
||||
|
||||
if key == keyboard.Key.space:
|
||||
self.toggle_recording(False)
|
||||
elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char('c'):
|
||||
self.fetch_image_from_camera()
|
||||
|
||||
|
||||
async def message_sender(self, websocket):
|
||||
while True:
|
||||
message = await asyncio.get_event_loop().run_in_executor(None, send_queue.get)
|
||||
|
@ -168,7 +243,11 @@ class Device:
|
|||
while True:
|
||||
try:
|
||||
async with websockets.connect(WS_URL) as websocket:
|
||||
logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
|
||||
if CAMERA_ENABLED:
|
||||
logger.info("Press the spacebar to start/stop recording. Press 'c' to capture an image from the camera. Press ESC to exit.")
|
||||
else:
|
||||
logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
|
||||
|
||||
asyncio.create_task(self.message_sender(websocket))
|
||||
|
||||
while True:
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
DEVICE=$(uname -n)
|
||||
if [[ "$DEVICE" == "rpi" ]]; then
|
||||
cd 01OS
|
||||
python -m 01OS.clients.rpi.device &
|
||||
python -m 01OS.clients.rpi.device
|
||||
else
|
||||
cd 01OS
|
||||
python -m 01OS.clients.macos.device &
|
||||
python -m 01OS.clients.macos.device
|
||||
fi
|
||||
|
|
|
@ -1890,6 +1890,31 @@ typing-extensions = ">=4.7,<5"
|
|||
[package.extras]
|
||||
datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
|
||||
|
||||
[[package]]
|
||||
name = "opencv-python"
|
||||
version = "4.9.0.80"
|
||||
description = "Wrapper package for OpenCV python bindings."
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "opencv-python-4.9.0.80.tar.gz", hash = "sha256:1a9f0e6267de3a1a1db0c54213d022c7c8b5b9ca4b580e80bdc58516c922c9e1"},
|
||||
{file = "opencv_python-4.9.0.80-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:7e5f7aa4486651a6ebfa8ed4b594b65bd2d2f41beeb4241a3e4b1b85acbbbadb"},
|
||||
{file = "opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71dfb9555ccccdd77305fc3dcca5897fbf0cf28b297c51ee55e079c065d812a3"},
|
||||
{file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b34a52e9da36dda8c151c6394aed602e4b17fa041df0b9f5b93ae10b0fcca2a"},
|
||||
{file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4088cab82b66a3b37ffc452976b14a3c599269c247895ae9ceb4066d8188a57"},
|
||||
{file = "opencv_python-4.9.0.80-cp37-abi3-win32.whl", hash = "sha256:dcf000c36dd1651118a2462257e3a9e76db789a78432e1f303c7bac54f63ef6c"},
|
||||
{file = "opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl", hash = "sha256:3f16f08e02b2a2da44259c7cc712e779eff1dd8b55fdb0323e8cab09548086c0"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
numpy = [
|
||||
{version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
|
||||
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
|
||||
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
|
||||
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
|
||||
{version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "23.2"
|
||||
|
@ -3514,4 +3539,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.9,<3.12"
|
||||
content-hash = "12ccff8a2521e7eb88eee82cfd3de409fea8e1658406d6148a42f9347ca7b2a7"
|
||||
content-hash = "5c8d587b405e97c0dca454078950157106f9aea687cbecce5b7ae7effd2aeece"
|
||||
|
|
|
@ -25,6 +25,7 @@ pydub = "^0.25.1"
|
|||
ngrok = "^1.0.0"
|
||||
open-interpreter = "^0.2.0"
|
||||
simpleaudio = "^1.0.4"
|
||||
opencv-python = "^4.9.0.80"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
|
|
@ -1,5 +1,12 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Set python to prioritize the module files from the current directory
|
||||
# If we don't do this, then the python interpreter will not be able to find the modules,
|
||||
# and will throw an error like "ModuleNotFoundError: No module named '01OS'".
|
||||
# If we solve the problem by pip installing the official 01OS package, then those
|
||||
# modules will run instead of the local ones that we are trying to develop with.
|
||||
export PYTHONPATH="$(pwd):$PYTHONPATH"
|
||||
|
||||
### Import Environment Variables from .env
|
||||
SCRIPT_DIR="$(dirname "$0")"
|
||||
if [ ! -f "$SCRIPT_DIR/.env" ]; then
|
||||
|
|
Loading…
Reference in New Issue