print('🟢 PDF_PROCESSOR.PY: Starting import...') #!/usr/bin/env python3 import base64 import fitz # PyMuPDF for PDF processing from pathlib import Path def extract_slides_from_pdf(pdf_path, output_dir, document_name): """Extract individual slides from PDF as images""" print(f"Extracting slides from PDF: {pdf_path}") # Create processed directory structure: ./processed/DocumentName/ processed_dir = Path("processed") / document_name processed_dir.mkdir(parents=True, exist_ok=True) # Create slides directory within processed directory slides_dir = processed_dir / "slides" slides_dir.mkdir(exist_ok=True) slides = [] try: # Open PDF with PyMuPDF pdf_document = fitz.open(pdf_path) for page_num in range(len(pdf_document)): page = pdf_document[page_num] # Convert page to image (high resolution) mat = fitz.Matrix(2.0, 2.0) # 2x zoom for better quality pix = page.get_pixmap(matrix=mat) # Save as PNG with document name prefix slide_filename = f"{document_name}_slide_{page_num + 1:03d}.png" slide_path = slides_dir / slide_filename pix.save(str(slide_path)) # Convert to base64 for API img_data = pix.tobytes("png") img_base64 = base64.b64encode(img_data).decode('utf-8') slides.append({ 'page_num': page_num + 1, 'filename': slide_filename, 'path': slide_path, 'base64': img_base64, 'document_name': document_name, 'processed_dir': processed_dir }) print(f" Extracted slide {page_num + 1}") pdf_document.close() print(f"✅ Extracted {len(slides)} slides") return slides except Exception as e: print(f"❌ Error extracting slides: {e}") return [] print('🟢 PDF_PROCESSOR.PY: Import complete!')