technical-screen-2025-10-22/app.py

#!/usr/bin/env python3

import sys
import os
import re
from pathlib import Path

def generate_toc(markdown_content):
    """Generate a Table of Contents from markdown headers"""
    print("  📋 Generating Table of Contents...")
    lines = markdown_content.split('\n')
    toc_lines = []
    toc_lines.append("## Table of Contents")
    toc_lines.append("")

    header_count = 0
    for line in lines:
        # Match headers (##, ###, etc.)
        header_match = re.match(r'^(#{2,})\s+(.+)$', line)
        if header_match:
            header_count += 1
            level = len(header_match.group(1)) - 2  # Convert ## to 0, ### to 1, etc.
            title = header_match.group(2)

            # Create anchor link
            anchor = re.sub(r'[^a-zA-Z0-9\s-]', '', title.lower())
            anchor = re.sub(r'\s+', '-', anchor.strip())

            # Add indentation based on header level
            indent = "  " * level
            toc_lines.append(f"{indent}- [{title}](#{anchor})")

    toc_lines.append("")
    toc_lines.append("---")
    toc_lines.append("")

    print(f"  ✅ Generated TOC with {header_count} headers")
    return '\n'.join(toc_lines)

def main():
    """Simple pitch deck analyzer"""
    if len(sys.argv) < 2:
        print("Usage: python app.py <pdf_file>")
        return

    pdf_path = sys.argv[1]
    if not os.path.exists(pdf_path):
        print(f"Error: File '{pdf_path}' not found")
        return

    print(f"🚀 Processing: {pdf_path}")

    # Import what we need directly (avoid __init__.py issues)
    print("📦 Importing modules...")
    sys.path.append('modules')
    from client import get_openrouter_client
    from pdf_processor import extract_slides_from_pdf
    from analysis import analyze_slides_batch
    from markdown_utils import send_to_api_and_get_haste_link
    print("✅ Modules imported successfully")

    # Extract slides
    print("📄 Extracting slides...")
    slides = extract_slides_from_pdf(pdf_path, "processed", Path(pdf_path).stem)
    print(f"✅ Extracted {len(slides)} slides")

    # Analyze slides
    print("🧠 Analyzing slides...")
    client = get_openrouter_client()
    print("🔗 API client initialized")

    analysis_results = analyze_slides_batch(client, slides)
    print("✅ Analysis complete")

    # Create report
    print("📝 Creating report...")
    markdown_content = f"# Pitch Deck Analysis: {Path(pdf_path).stem}\n\n"

    # Add analysis metadata
    markdown_content += "This analysis was generated using multiple AI agents, each specialized in different aspects of slide evaluation.\n\n"
    markdown_content += f"**Source File:** `{Path(pdf_path).name}` (PDF)\n"
    markdown_content += f"**Analysis Generated:** {len(slides)} slides processed\n"
    markdown_content += "**Processing Method:** Individual processing with specialized AI agents\n"
    markdown_content += "**Text Extraction:** Docling-powered text transcription\n\n"

    print(f"📊 Building markdown for {len(slides)} slides...")
    for i, slide_data in enumerate(slides):
        slide_num = i + 1
        analysis = analysis_results.get(slide_num, {})

        print(f"  📄 Processing slide {slide_num}...")

        markdown_content += f"# Slide {slide_num}\n\n"
        markdown_content += f"![Slide {slide_num}](slides/{slide_data['filename']})\n\n"

        if analysis:
            markdown_content += "## Agentic Analysis\n\n"

            # Format each agent's analysis
            agent_count = 0
            for agent_key, agent_data in analysis.items():
                if isinstance(agent_data, dict) and 'agent' in agent_data and 'analysis' in agent_data:
                    agent_count += 1
                    agent_name = agent_data['agent']
                    agent_analysis = agent_data['analysis']

                    markdown_content += f"### {agent_name}\n\n"
                    markdown_content += f"{agent_analysis}\n\n"

            print(f"    ✅ Added {agent_count} agent analyses")
        else:
            markdown_content += "## Agentic Analysis\n\n"
            markdown_content += "No analysis available\n\n"
            print(f"    ⚠️  No analysis available for slide {slide_num}")

        markdown_content += "---\n\n"

    # Generate Table of Contents
    print("📋 Generating Table of Contents...")
    toc = generate_toc(markdown_content)

    # Insert TOC after the main title
    print("🔗 Inserting TOC into document...")
    lines = markdown_content.split('\n')
    final_content = []
    final_content.append(lines[0])  # Main title
    final_content.append("")  # Empty line
    final_content.append(toc)  # TOC
    final_content.extend(lines[2:])  # Rest of content

    final_markdown = '\n'.join(final_content)

    # Save report
    output_file = f"processed/{Path(pdf_path).stem}_analysis.md"
    print(f"💾 Saving report to: {output_file}")
    os.makedirs("processed", exist_ok=True)

    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(final_markdown)

    print(f"✅ Report saved successfully ({len(final_markdown)} characters)")

    # Always upload the report
    print("🌐 Uploading report...")
    haste_url = send_to_api_and_get_haste_link(final_markdown, Path(pdf_path).stem)
    if haste_url:
        print(f"✅ Report uploaded to: {haste_url}")
    else:
        print("❌ Upload failed")

if __name__ == "__main__":
    main()