From 874e0a69ae07266d1ffca630e8ef0d657f84f12e Mon Sep 17 00:00:00 2001 From: colin Date: Wed, 27 Aug 2025 18:59:45 -0400 Subject: [PATCH] feat: add marketline crawler container and services; CI build; README usage --- .woodpecker.yml | 5 +++++ README.md | 8 ++++++++ docker-compose.dev.yml | 14 ++++++++++++++ docker-compose.yml | 12 ++++++++++++ docker/crawler/Dockerfile | 20 ++++++++++++++++++++ 5 files changed, 59 insertions(+) create mode 100644 docker/crawler/Dockerfile diff --git a/.woodpecker.yml b/.woodpecker.yml index 043f6f6..13ad056 100644 --- a/.woodpecker.yml +++ b/.woodpecker.yml @@ -9,6 +9,11 @@ pipeline: commands: - docker build -t ploughshares-crawler-google-alerts:ci docker/crawler-google-alerts + build_crawler_marketline: + image: docker:24 + commands: + - docker build -t ploughshares-crawler-marketline:ci docker/crawler + # build:0 labels: location: manager diff --git a/README.md b/README.md index 109ad12..824d779 100644 --- a/README.md +++ b/README.md @@ -163,6 +163,14 @@ To run the Google Alerts crawler locally (requires GOOGLE_API_KEY): GOOGLE_API_KEY=your_key docker-compose -f docker-compose.dev.yml run --rm crawler_google_alerts ``` +### Crawler - Marketline + +Run the marketline crawler pipeline (scrape -> analyze -> push): + +```bash +GOOGLE_API_KEY=your_key docker-compose -f docker-compose.dev.yml run --rm crawler_marketline +``` + ## Features - Transaction management (create, view, edit) diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index bfdc96e..0d4c322 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -62,6 +62,20 @@ services: - ./docker/crawler-google-alerts:/app restart: unless-stopped + crawler_marketline: + build: + context: ./docker/crawler + image: ploughshares-crawler-marketline:dev + environment: + - GOOGLE_API_KEY=${GOOGLE_API_KEY} + depends_on: + db: + condition: service_started + command: bash run_all.sh + volumes: + - ./docker/crawler:/app + restart: unless-stopped + volumes: postgres_dev_data: diff --git a/docker-compose.yml b/docker-compose.yml index 65a42e6..02c1aa1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -52,6 +52,18 @@ services: command: python main.py restart: unless-stopped + crawler_marketline: + build: + context: ./docker/crawler + image: ploughshares-crawler-marketline:latest + environment: + - GOOGLE_API_KEY=${GOOGLE_API_KEY} + depends_on: + db: + condition: service_started + command: bash run_all.sh + restart: unless-stopped + volumes: postgres_data: diff --git a/docker/crawler/Dockerfile b/docker/crawler/Dockerfile new file mode 100644 index 0000000..0e8f3e1 --- /dev/null +++ b/docker/crawler/Dockerfile @@ -0,0 +1,20 @@ +FROM python:3.11-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt + +COPY . /app + +# GOOGLE_API_KEY must be provided at runtime +CMD ["bash", "run_all.sh"] + +