#!/usr/bin/env python3 import subprocess from pathlib import Path def detect_file_type(file_path): """Detect file type based on extension""" file_ext = Path(file_path).suffix.lower() file_types = { '.pdf': 'pdf', '.pptx': 'powerpoint', '.ppt': 'powerpoint', '.docx': 'word', '.doc': 'word', '.odp': 'openoffice_presentation', '.odt': 'openoffice_document' } return file_types.get(file_ext, 'unknown') def convert_to_pdf(input_file, output_dir, document_name): """Convert various file types to PDF""" file_type = detect_file_type(input_file) if file_type == 'pdf': print("✅ File is already PDF, no conversion needed") return input_file print(f"🔄 Converting {file_type} file to PDF...") # Create temporary PDF file temp_pdf = output_dir + "/" + f"{document_name}_temp.pdf" try: if file_type == 'powerpoint': # Convert PowerPoint to PDF using pptxtopdf print(" Using pptxtopdf for PowerPoint conversion...") result = subprocess.run([ 'python', '-c', f'import pptxtopdf; pptxtopdf.convert("{input_file}", "{temp_pdf}")' ], capture_output=True, text=True, timeout=60) if result.returncode != 0: print(f"⚠️ pptxtopdf failed: {result.stderr}") # Fallback: try using LibreOffice return convert_with_libreoffice(input_file, temp_pdf, file_type) elif file_type in ['word', 'openoffice_document']: # Convert Word documents using LibreOffice return convert_with_libreoffice(input_file, temp_pdf, file_type) elif file_type == 'openoffice_presentation': # Convert OpenOffice presentations using LibreOffice return convert_with_libreoffice(input_file, temp_pdf, file_type) else: print(f"❌ Unsupported file type: {file_type}") return None if temp_pdf.exists(): print(f"✅ Successfully converted to PDF: {temp_pdf}") return str(temp_pdf) else: print("❌ Conversion failed - PDF file not created") return None except subprocess.TimeoutExpired: print("❌ Conversion timed out") return None except Exception as e: print(f"❌ Conversion error: {e}") return None def convert_with_libreoffice(input_file, output_pdf, file_type): """Convert files using LibreOffice as fallback""" try: print(f" Using LibreOffice for {file_type} conversion...") # LibreOffice command cmd = [ 'soffice', '--headless', '--convert-to', 'pdf', '--outdir', str(output_pdf.parent), str(input_file) ] result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) if result.returncode == 0: # LibreOffice creates PDF with same name as input input_name = Path(input_file).stem libreoffice_pdf = os.path.dirname(output_pdf) + "/" + f"{input_name}.pdf" if libreoffice_pdf.exists(): # Rename to our expected temp name libreoffice_pdf.rename(output_pdf) print(f"✅ LibreOffice conversion successful: {output_pdf}") return str(output_pdf) print(f"⚠️ LibreOffice conversion failed: {result.stderr}") return None except subprocess.TimeoutExpired: print("❌ LibreOffice conversion timed out") return None except Exception as e: print(f"❌ LibreOffice conversion error: {e}") return None