112 lines
3.8 KiB
Python
112 lines
3.8 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
|
|
def detect_file_type(file_path):
|
|
"""Detect file type based on extension"""
|
|
file_ext = Path(file_path).suffix.lower()
|
|
|
|
file_types = {
|
|
'.pdf': 'pdf',
|
|
'.pptx': 'powerpoint',
|
|
'.ppt': 'powerpoint',
|
|
'.docx': 'word',
|
|
'.doc': 'word',
|
|
'.odp': 'openoffice_presentation',
|
|
'.odt': 'openoffice_document'
|
|
}
|
|
|
|
return file_types.get(file_ext, 'unknown')
|
|
|
|
|
|
def convert_to_pdf(input_file, output_dir, document_name):
|
|
"""Convert various file types to PDF"""
|
|
file_type = detect_file_type(input_file)
|
|
|
|
if file_type == 'pdf':
|
|
print("✅ File is already PDF, no conversion needed")
|
|
return input_file
|
|
|
|
print(f"🔄 Converting {file_type} file to PDF...")
|
|
|
|
# Create temporary PDF file
|
|
temp_pdf = output_dir + "/" + f"{document_name}_temp.pdf"
|
|
|
|
try:
|
|
if file_type == 'powerpoint':
|
|
# Convert PowerPoint to PDF using pptxtopdf
|
|
print(" Using pptxtopdf for PowerPoint conversion...")
|
|
result = subprocess.run([
|
|
'python', '-c',
|
|
f'import pptxtopdf; pptxtopdf.convert("{input_file}", "{temp_pdf}")'
|
|
], capture_output=True, text=True, timeout=60)
|
|
|
|
if result.returncode != 0:
|
|
print(f"⚠️ pptxtopdf failed: {result.stderr}")
|
|
# Fallback: try using LibreOffice
|
|
return convert_with_libreoffice(input_file, temp_pdf, file_type)
|
|
|
|
elif file_type in ['word', 'openoffice_document']:
|
|
# Convert Word documents using LibreOffice
|
|
return convert_with_libreoffice(input_file, temp_pdf, file_type)
|
|
|
|
elif file_type == 'openoffice_presentation':
|
|
# Convert OpenOffice presentations using LibreOffice
|
|
return convert_with_libreoffice(input_file, temp_pdf, file_type)
|
|
|
|
else:
|
|
print(f"❌ Unsupported file type: {file_type}")
|
|
return None
|
|
|
|
if temp_pdf.exists():
|
|
print(f"✅ Successfully converted to PDF: {temp_pdf}")
|
|
return str(temp_pdf)
|
|
else:
|
|
print("❌ Conversion failed - PDF file not created")
|
|
return None
|
|
|
|
except subprocess.TimeoutExpired:
|
|
print("❌ Conversion timed out")
|
|
return None
|
|
except Exception as e:
|
|
print(f"❌ Conversion error: {e}")
|
|
return None
|
|
|
|
|
|
def convert_with_libreoffice(input_file, output_pdf, file_type):
|
|
"""Convert files using LibreOffice as fallback"""
|
|
try:
|
|
print(f" Using LibreOffice for {file_type} conversion...")
|
|
|
|
# LibreOffice command
|
|
cmd = [
|
|
'soffice', '--headless', '--convert-to', 'pdf',
|
|
'--outdir', str(output_pdf.parent),
|
|
str(input_file)
|
|
]
|
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
|
|
|
if result.returncode == 0:
|
|
# LibreOffice creates PDF with same name as input
|
|
input_name = Path(input_file).stem
|
|
libreoffice_pdf = os.path.dirname(output_pdf) + "/" + f"{input_name}.pdf"
|
|
|
|
if libreoffice_pdf.exists():
|
|
# Rename to our expected temp name
|
|
libreoffice_pdf.rename(output_pdf)
|
|
print(f"✅ LibreOffice conversion successful: {output_pdf}")
|
|
return str(output_pdf)
|
|
|
|
print(f"⚠️ LibreOffice conversion failed: {result.stderr}")
|
|
return None
|
|
|
|
except subprocess.TimeoutExpired:
|
|
print("❌ LibreOffice conversion timed out")
|
|
return None
|
|
except Exception as e:
|
|
print(f"❌ LibreOffice conversion error: {e}")
|
|
return None
|