Merge branch 'main' of https://github.com/OpenInterpreter/01 into react-native-app

2024-05-06 12:43:05 -04:00 · 2024-05-06 12:43:05 -04:00 · 4994132c19
parent 2f594dd825 81d53b79f4
commit 4994132c19
13 changed files with 170 additions and 12 deletions
--- a/README.md
+++ b/README.md
@ -8,6 +8,13 @@
    <br><a href="https://openinterpreter.com/01">Preorder the Light</a>‎ ‎ |‎ ‎ <a href="https://changes.openinterpreter.com">Get Updates</a>‎ ‎ |‎ ‎ <a href="https://01.openinterpreter.com/">Documentation</a><br>
 </p>
 <div align="center">
 | [日本語](docs/README_JP.md) | [English](README.md) |
 </div>
 <br>
 ![OI-O1-BannerDemo-2](https://www.openinterpreter.com/OI-O1-BannerDemo-3.jpg)
--- a/docs/video_documentation/collection.md
+++ b/docs/video_documentation/collection.md
@ -0,0 +1,83 @@
 ## For End Users
 [Announcment video](https://www.youtube.com/watch?v=jWr-WeXAdeI)
 [Wes Roth](https://www.youtube.com/@WesRoth)
 <details>
 <summary>Details</summary>
 No technical coverage
 </details>
 ---
 [Announcment video](https://www.youtube.com/watch?v=JaBFT3fF2fk)
 [TheAIGRID](https://www.youtube.com/@TheAiGrid)
 <details>
 <summary>Details</summary>
 [here](https://youtu.be/JaBFT3fF2fk?si=8zPGO-U6WdLNnISw&t=656)
 mentions the current lack of windows support
 </details>
 ---
 [Announcment video](https://www.youtube.com/watch?v=Q_p82HtBqoc)
 [Matt Berman](https://www.youtube.com/@matthew_berman)
 <details>
 <summary>Details</summary>
 [here](https://youtu.be/Q_p82HtBqoc?si=aAxjWZnBdwBbaOUr&t=579)
 Berman shows an install of 01 using conda and python 3.9
 in.. looks like linux.. shows how to get openai keys.
 </details>
 ---
 [Announcment video](https://www.youtube.com/watch?v=q0dJ7T7au2Y)
 [WorldofAI](https://www.youtube.com/@intheworldofai)
 <details>
 <summary>Details</summary>
 <!-- Add details here -->
 </details>
 ---
 [Breakdown video](https://www.youtube.com/watch?v=W-VwN0n4d9Y)
 [Mervin Praison](https://www.youtube.com/@MervinPraison)
 <details>
 <summary>Details</summary>
 - uses conda to install 01 and uses python 3.11 on linux.. maybe mac
 - 0:00 Introduction to Open Interpreter
 - 0:47 Creating Apps and Summarizing Documents
 - 1:20 Image Modifications and Game Creation
 - 2:55 Exploratory Data Analysis and Charting
 - 4:00 Server Log Analysis
 - 5:01 Image and Video Editing
 - 6:00 Composing Music with AI
 - 7:18 Calendar Management and Email Automation
 - 9:01 Integrating with Fast API and LM Studio
 </details>
 ---
 [Breakdown video](https://www.youtube.com/watch?v=uyfoHQVgeY0)
 [Gary Explains](https://www.youtube.com/@GaryExplains)
 <br>for **open interpreter** not **01**
 <details>
 <summary>Details</summary>
 - 3:45 states that it will run on mac/linux and windows and requires python 3.10
 </details>
 ## For Developers
 <BR>
 Coming soon
--- a/hardware/light/README.md
+++ b/hardware/light/README.md
@ -11,6 +11,7 @@ To set up audio recording + playback on the ESP32 (M5 Atom), do the following:
 - M5Atom by M5Stack [Reference](https://www.arduino.cc/reference/en/libraries/m5atom/)
 - WebSockets by Markus Sattler [Reference](https://www.arduino.cc/reference/en/libraries/websockets/)
 - AsyncTCP by dvarrel [Reference](https://github.com/dvarrel/AsyncTCP)
 - ESPAsyncWebServer by lacamera [Reference](https://github.com/lacamera/ESPAsyncWebServer)
 Finally, to flash the .ino to the board, connect the board to the USB port, select the port from the dropdown on the IDE, then select the M5Atom board (or M5Stack-ATOM if you have that). Click on upload to flash the board.
--- a/hardware/light/bodies/v1.2/01,
+++ b/hardware/light/bodies/v1.2/01,
--- a/hardware/light/bodies/v1.2/Backing
+++ b/hardware/light/bodies/v1.2/Backing
--- a/hardware/light/bodies/v1.2/Bottom
+++ b/hardware/light/bodies/v1.2/Bottom
--- a/hardware/light/bodies/v1.2/Top
+++ b/hardware/light/bodies/v1.2/Top
--- a/software/source/clients/base_device.py
+++ b/software/source/clients/base_device.py
@ -3,6 +3,7 @@ from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.
 import os
 import sys
 import asyncio
 import threading
 import pyaudio
@ -58,7 +59,16 @@ CAMERA_WARMUP_SECONDS = float(os.getenv("CAMERA_WARMUP_SECONDS", 0))
 # Specify OS
 current_platform = get_system_info()
-is_win10 = lambda: platform.system() == "Windows" and "10" in platform.version()
+
 def is_win11():
    return sys.getwindowsversion().build >= 22000
 def is_win10():
    try:
        return platform.system() == "Windows" and "10" in platform.version() and not is_win11()
    except:
        return False
 # Initialize PyAudio
 p = pyaudio.PyAudio()
@ -72,6 +82,7 @@ class Device:
        self.captured_images = []
        self.audiosegments = []
        self.server_url = ""
        self.ctrl_pressed = False
    def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
        """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
@ -256,23 +267,39 @@ class Device:
    def on_press(self, key):
        """Detect spacebar press and Ctrl+C combination."""
        self.pressed_keys.add(key)  # Add the pressed key to the set
        if keyboard.Key.space in self.pressed_keys:
            self.toggle_recording(True)
-        elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char("c")} <= self.pressed_keys:
+        elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char('c')} <= self.pressed_keys:
            logger.info("Ctrl+C pressed. Exiting...")
            kill_process_tree()
            os._exit(0)
        # Windows alternative to the above
        if key == keyboard.Key.ctrl_l:
            self.ctrl_pressed = True
        try:
            if key.vk == 67 and self.ctrl_pressed:
                logger.info("Ctrl+C pressed. Exiting...")
                kill_process_tree()
                os._exit(0)
        # For non-character keys
        except:
            pass
    def on_release(self, key):
        """Detect spacebar release and 'c' key press for camera, and handle key release."""
-        self.pressed_keys.discard(
+        self.pressed_keys.discard(key)  # Remove the released key from the key press tracking set
            key
        )  # Remove the released key from the key press tracking set
        if key == keyboard.Key.ctrl_l:
            self.ctrl_pressed = False
        if key == keyboard.Key.space:
            self.toggle_recording(False)
-        elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char("c"):
+        elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char('c'):
            self.fetch_image_from_camera()
    async def message_sender(self, websocket):
@ -342,7 +369,7 @@ class Device:
                        code = message["content"]
                        result = interpreter.computer.run(language, code)
                        send_queue.put(result)
-
+                        
        if is_win10():
            logger.info("Windows 10 detected")
            # Workaround for Windows 10 not latching to the websocket server.
--- a/software/source/server/server.py
+++ b/software/source/server/server.py
@ -355,7 +355,7 @@ async def listener(mobile: bool):
                        json.dump(interpreter.messages, file, indent=4)
                    # TODO: is triggering seemingly randomly
-                    # logger.info("New user message recieved. Breaking.")
+                    # logger.info("New user message received. Breaking.")
                    # break
                # Also check if there's any new computer messages
@ -363,7 +363,7 @@ async def listener(mobile: bool):
                    with open(conversation_history_path, "w") as file:
                        json.dump(interpreter.messages, file, indent=4)
-                    logger.info("New computer message recieved. Breaking.")
+                    logger.info("New computer message received. Breaking.")
                    break
        except:
            traceback.print_exc()
--- a/software/source/server/services/stt/local-whisper/stt.py
+++ b/software/source/server/services/stt/local-whisper/stt.py
@ -156,7 +156,7 @@ def stt_wav(service_directory, wav_file_path: str):
        temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav"
    )
    ffmpeg.input(wav_file_path).output(
-        output_path, acodec="pcm_s16le", ac=1, ar="16k"
+        output_path, acodec="pcm_s16le", ac=1, ar="16k", loglevel="panic"
    ).run()
    try:
        transcript = get_transcription_file(service_directory, output_path)
--- a/software/source/server/utils/kernel.py
+++ b/software/source/server/utils/kernel.py
@ -5,12 +5,17 @@ load_dotenv()  # take environment variables from .env.
 import asyncio
 import subprocess
 import platform
 import os
 import shutil
 from .logs import setup_logging
 from .logs import logger
 setup_logging()
 # dmesg process created at boot time
 dmesg_proc = None
 def get_kernel_messages():
    """
@ -25,12 +30,37 @@ def get_kernel_messages():
        output, _ = process.communicate()
        return output.decode("utf-8")
    elif current_platform == "Linux":
-        with open("/var/log/dmesg", "r") as file:
+        log_path = get_dmesg_log_path()
        with open(log_path, 'r') as file:
            return file.read()
    else:
        logger.info("Unsupported platform.")
 def get_dmesg_log_path():
    """
    Check for the existence of a readable dmesg log file and return its path.
    Create an accessible path if not found.
    """
    if os.access('/var/log/dmesg', os.F_OK | os.R_OK):
        return '/var/log/dmesg'
    global dmesg_proc
    dmesg_log_path = '/tmp/dmesg'
    if dmesg_proc:
        return dmesg_log_path
    logger.info("Created /tmp/dmesg.")
    subprocess.run(['touch', dmesg_log_path])
    dmesg_path = shutil.which('dmesg')
    if dmesg_path:
        logger.info(f"Writing to {dmesg_log_path} from dmesg.")
        dmesg_proc = subprocess.Popen([dmesg_path, '--follow'], text=True, stdout=subprocess.PIPE)
        subprocess.Popen(['tee', dmesg_log_path], text=True, stdin=dmesg_proc.stdout, stdout=subprocess.DEVNULL)
    return dmesg_log_path
 def custom_filter(message):
    # Check for {TO_INTERPRETER{ message here }TO_INTERPRETER} pattern
    if "{TO_INTERPRETER{" in message and "}TO_INTERPRETER}" in message:
--- a/software/source/server/utils/process_utils.py
+++ b/software/source/server/utils/process_utils.py
@ -7,7 +7,11 @@ def kill_process_tree():
    pid = os.getpid()  # Get the current process ID
    try:
        # Send SIGTERM to the entire process group to ensure all processes are targeted
-        os.killpg(os.getpgid(pid), signal.SIGKILL)
+        try:
            os.killpg(os.getpgid(pid), signal.SIGKILL)
        # Windows implementation
        except AttributeError:
            os.kill(pid, signal.SIGTERM)
        parent = psutil.Process(pid)
        children = parent.children(recursive=True)
        for child in children:
--- a/software/start.py
+++ b/software/start.py
@ -127,6 +127,10 @@ def _run(
        # llm_service = "llamafile"
        stt_service = "local-whisper"
        select_local_model()
    system_type = platform.system()
    if system_type == "Windows":
        server_host = "localhost"
    if not server_url:
        server_url = f"{server_host}:{server_port}"
@ -134,6 +138,8 @@ def _run(
    if not server and not client:
        server = True
        client = True
    def handle_exit(signum, frame):
        os._exit(0)