#!/usr/bin/env python3 print("šŸš€ APP.PY STARTING - IMMEDIATE FEEDBACK", flush=True) import sys import os import re import time from pathlib import Path print("šŸ“¦ BASIC IMPORTS COMPLETE", flush=True) def generate_toc(markdown_content): """Generate a Table of Contents from markdown headers""" print(" šŸ“‹ Generating Table of Contents...", flush=True) lines = markdown_content.split('\n') toc_lines = [] toc_lines.append("## Table of Contents") toc_lines.append("") header_count = 0 for line in lines: # Match headers (##, ###, etc.) header_match = re.match(r'^(#{2,})\s+(.+)$', line) if header_match: header_count += 1 level = len(header_match.group(1)) - 2 # Convert ## to 0, ### to 1, etc. title = header_match.group(2) # Create anchor link anchor = re.sub(r'[^a-zA-Z0-9\s-]', '', title.lower()) anchor = re.sub(r'\s+', '-', anchor.strip()) # Add indentation based on header level indent = " " * level toc_lines.append(f"{indent}- [{title}](#{anchor})") toc_lines.append("") toc_lines.append("---") toc_lines.append("") print(f" āœ… Generated TOC with {header_count} headers", flush=True) return '\n'.join(toc_lines) def main(): """Simple pitch deck analyzer with comprehensive debugging""" print("šŸš€ PITCH DECK ANALYZER MAIN FUNCTION STARTING", flush=True) print("=" * 50, flush=True) if len(sys.argv) < 2: print("āŒ Usage: python app.py ", flush=True) return pdf_path = sys.argv[1] if not os.path.exists(pdf_path): print(f"āŒ Error: File '{pdf_path}' not found", flush=True) return print(f"šŸ“ Processing file: {pdf_path}", flush=True) print(f"šŸ“ File exists: {os.path.exists(pdf_path)}", flush=True) print(f"šŸ“ File size: {os.path.getsize(pdf_path)} bytes", flush=True) # Import what we need directly (avoid __init__.py issues) print("\nšŸ“¦ IMPORTING MODULES", flush=True) print("-" * 30, flush=True) sys.path.append('modules') print(" šŸ”„ Importing client module...", flush=True) from client import get_openrouter_client print(" āœ… client module imported successfully", flush=True) print(" šŸ”„ Importing pdf_processor module...", flush=True) from pdf_processor import extract_slides_from_pdf print(" āœ… pdf_processor module imported successfully", flush=True) print(" šŸ”„ Importing analysis module...", flush=True) from analysis import analyze_slides_batch print(" āœ… analysis module imported successfully", flush=True) print(" šŸ”„ Importing markdown_utils module...", flush=True) from markdown_utils import send_to_api_and_get_haste_link print(" āœ… markdown_utils module imported successfully", flush=True) print("āœ… ALL MODULES IMPORTED SUCCESSFULLY", flush=True) # Extract slides print("\nšŸ“„ EXTRACTING SLIDES", flush=True) print("-" * 30, flush=True) print(" šŸ”„ Calling extract_slides_from_pdf...", flush=True) start_time = time.time() slides = extract_slides_from_pdf(pdf_path, "processed", Path(pdf_path).stem) extraction_time = time.time() - start_time print(f" āœ… extract_slides_from_pdf completed in {extraction_time:.2f}s", flush=True) print(f" šŸ“Š Extracted {len(slides)} slides", flush=True) # LIMIT TO FIRST 3 SLIDES FOR TESTING print(f" šŸ”„ Limiting to first 3 slides for testing...", flush=True) slides = slides[:3] print(f" šŸ“Š Processing {len(slides)} slides", flush=True) # Analyze slides print("\n🧠 ANALYZING SLIDES", flush=True) print("-" * 30, flush=True) print(" šŸ”„ Initializing API client...", flush=True) client = get_openrouter_client() print(" āœ… API client initialized successfully", flush=True) print(" šŸ”„ Calling analyze_slides_batch...", flush=True) analysis_start_time = time.time() analysis_results = analyze_slides_batch(client, slides) analysis_time = time.time() - analysis_start_time print(f" āœ… analyze_slides_batch completed in {analysis_time:.2f}s", flush=True) print(f" šŸ“Š Analysis results: {len(analysis_results)} slides analyzed", flush=True) # Create report print("\nšŸ“ CREATING REPORT", flush=True) print("-" * 30, flush=True) print(" šŸ”„ Building markdown content...", flush=True) markdown_content = f"# Pitch Deck Analysis: {Path(pdf_path).stem}\n\n" # Add analysis metadata markdown_content += "This analysis was generated using multiple AI agents, each specialized in different aspects of slide evaluation.\n\n" markdown_content += f"**Source File:** `{Path(pdf_path).name}` (PDF)\n" markdown_content += f"**Analysis Generated:** {len(slides)} slides processed (limited for testing)\n" markdown_content += "**Processing Method:** Individual processing with specialized AI agents\n" markdown_content += "**Text Extraction:** Docling-powered text transcription\n\n" # Add executive summary at the top (model-assisted with heuristic fallback) print(" šŸ”„ Generating executive summary...", flush=True) def _build_heuristic_summary(analysis_results_local): categories = [ ('problem_analyzer', 'Problem Analysis'), ('solution_evaluator', 'Solution Evaluation'), ('market_opportunity_assessor', 'Market Opportunity'), ('traction_evaluator', 'Traction'), ('funding_analyzer', 'Funding & Ask') ] lines = [] lines.append("## Executive Summary\n") # Overall one-liner assembled from first sentences overall_bits = [] for slide_num in sorted(analysis_results_local.keys()): slide_agents = analysis_results_local.get(slide_num, {}) pa = slide_agents.get('problem_analyzer', {}).get('analysis', '') if pa: first_sentence = pa.split('. ')[0].strip() if first_sentence: overall_bits.append(first_sentence) if len(overall_bits) >= 3: break if overall_bits: lines.append("" + " ".join(overall_bits) + "\n") # Coverage table-like bullets lines.append("### Coverage of Points of Interest\n") for key, title in categories: coverage_note = "Covered" for slide_agents in analysis_results_local.values(): if key in slide_agents and slide_agents[key].get('analysis'): coverage_note = "Covered" break lines.append(f"- {title}: {coverage_note}") lines.append("\n### Slide Snapshots\n") for slide_num in sorted(analysis_results_local.keys()): slide_agents = analysis_results_local.get(slide_num, {}) pa = slide_agents.get('problem_analyzer', {}).get('analysis', '') one_liner = (pa.split('\n')[0].split('. ')[0]).strip() if pa else "No clear problem statement identified." lines.append(f"- Slide {slide_num}: {one_liner}") lines.append("\n") return "\n".join(lines) def _build_model_summary(client_local, analysis_results_local): try: # Aggregate content for model blocks = [] for slide_num in sorted(analysis_results_local.keys()): slide_agents = analysis_results_local[slide_num] parts = [] for k, v in slide_agents.items(): agent_name = v.get('agent', k) analysis_text = v.get('analysis', '') parts.append(f"{agent_name}: {analysis_text}") blocks.append(f"Slide {slide_num}:\n" + "\n".join(parts)) aggregate_text = "\n\n".join(blocks) messages = [ {"role": "system", "content": "You are a senior pitch deck analyst. Create a concise executive summary."}, {"role": "user", "content": [ {"type": "text", "text": "Summarize this deck. Provide: 1) 2-3 sentence overall summary of what the deck accomplishes; 2) A bullet list rating coverage of these points of interest: Problem, Solution, Market Opportunity, Traction, Funding & Ask (ratings: Strong/Covered/Weak/Not covered) with one short note each; 3) A one-line snapshot per slide. Return Markdown only."}, {"type": "text", "text": aggregate_text} ]} ] response = client_local.chat.completions.create( model="gpt-4o-mini", messages=messages, max_tokens=700 ) content = response.choices[0].message.content.strip() if content: return content + "\n\n" except Exception as _e: print(f" āš ļø Model summary generation failed: {_e}", flush=True) return None summary_md = _build_model_summary(client, analysis_results) or _build_heuristic_summary(analysis_results) markdown_content += summary_md print(f" šŸ“Š Building markdown for {len(slides)} slides...", flush=True) for i, slide_data in enumerate(slides): slide_num = i + 1 print(f" šŸ”„ Processing slide {slide_num}/{len(slides)}...", flush=True) analysis = analysis_results.get(slide_num, {}) markdown_content += f"# Slide {slide_num}\n\n" markdown_content += f"![Slide {slide_num}](slides/{slide_data['filename']})\n\n" if analysis: markdown_content += "## Agentic Analysis\n\n" # Format each agent's analysis agent_count = 0 for agent_key, agent_data in analysis.items(): if isinstance(agent_data, dict) and 'agent' in agent_data and 'analysis' in agent_data: agent_count += 1 agent_name = agent_data['agent'] agent_analysis = agent_data['analysis'] markdown_content += f"### {agent_name}\n\n" markdown_content += f"{agent_analysis}\n\n" print(f" āœ… Added {agent_count} agent analyses for slide {slide_num}", flush=True) else: markdown_content += "## Agentic Analysis\n\n" markdown_content += "No analysis available\n\n" print(f" āš ļø No analysis available for slide {slide_num}", flush=True) markdown_content += "---\n\n" print(" āœ… Markdown content built successfully", flush=True) # Generate Table of Contents print(" šŸ”„ Generating Table of Contents...", flush=True) toc = generate_toc(markdown_content) # Insert TOC after the main title print(" šŸ”„ Inserting TOC into document...", flush=True) lines = markdown_content.split('\n') final_content = [] final_content.append(lines[0]) # Main title final_content.append("") # Empty line final_content.append(toc) # TOC final_content.extend(lines[2:]) # Rest of content final_markdown = '\n'.join(final_content) print(f" āœ… Final markdown created: {len(final_markdown)} characters", flush=True) # Save report print("\nšŸ’¾ SAVING REPORT", flush=True) print("-" * 30, flush=True) output_file = f"processed/{Path(pdf_path).stem}_analysis.md" print(f" šŸ”„ Saving to: {output_file}", flush=True) os.makedirs("processed", exist_ok=True) with open(output_file, 'w', encoding='utf-8') as f: f.write(final_markdown) print(f" āœ… Report saved successfully ({len(final_markdown)} characters)", flush=True) # Always upload the report print("\n🌐 UPLOADING REPORT", flush=True) print("-" * 30, flush=True) print(" šŸ”„ Calling send_to_api_and_get_haste_link...", flush=True) upload_result = send_to_api_and_get_haste_link(final_markdown, Path(pdf_path).stem) if isinstance(upload_result, tuple) or isinstance(upload_result, list): raw_url, html_url = upload_result if len(upload_result) >= 2 else (upload_result[0], None) if raw_url: print(f" āœ… Raw markdown URL: {raw_url}", flush=True) if html_url: print(f" āœ… HTML URL: {html_url}", flush=True) if not raw_url and not html_url: print(" āŒ Upload failed - no URLs returned", flush=True) elif upload_result: print(f" āœ… Report uploaded successfully: {upload_result}", flush=True) else: print(" āŒ Upload failed - no URL returned", flush=True) print("\nšŸŽ‰ PROCESSING COMPLETE!", flush=True) print("=" * 50, flush=True) if __name__ == "__main__": print("šŸŽÆ __main__ BLOCK ENTERED", flush=True) main()