#!/usr/bin/env python3 """ Market Cap Validator - Main Interface This module provides a simple interface to validate market cap claims from pitch deck slides using RAG search capabilities. """ import os import json from typing import List, Dict, Any, Optional from .rag_agent import MarketCapRAGAgent from .validation_report import ValidationReportGenerator class MarketCapValidator: """ Main interface for market cap validation using RAG search """ def __init__(self, api_key: Optional[str] = None): """ Initialize the market cap validator Args: api_key: OpenRouter API key (if not provided, will use environment variable) """ self.rag_agent = MarketCapRAGAgent(api_key) self.report_generator = ValidationReportGenerator() def validate_from_slides(self, slide_texts: List[Dict[str, Any]], save_report: bool = True) -> Dict[str, Any]: """ Validate market cap claims from slide text exports Args: slide_texts: List of slide data with 'slide_number' and 'text' keys save_report: Whether to save the validation report to file Returns: Dictionary containing validation results and report """ print("šŸ” Starting market cap validation process...") # Extract and validate claims validation_results = self.rag_agent.validate_all_claims(slide_texts) # Generate report report = self.report_generator.generate_report(validation_results, slide_texts) # Save report if requested report_filename = None if save_report: report_filename = self.report_generator.save_report(report) print(f"šŸ“„ Validation report saved to: {report_filename}") # Prepare summary summary = self._generate_summary(validation_results) return { 'validation_results': validation_results, 'report': report, 'report_filename': report_filename, 'summary': summary } def validate_from_file(self, file_path: str, save_report: bool = True) -> Dict[str, Any]: """ Validate market cap claims from a JSON file containing slide texts Args: file_path: Path to JSON file with slide data save_report: Whether to save the validation report to file Returns: Dictionary containing validation results and report """ try: with open(file_path, 'r', encoding='utf-8') as f: slide_texts = json.load(f) print(f"šŸ“ Loaded slide data from: {file_path}") return self.validate_from_slides(slide_texts, save_report) except FileNotFoundError: raise FileNotFoundError(f"File not found: {file_path}") except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON file: {e}") def validate_from_processed_folder(self, folder_path: str = "processed", save_report: bool = True) -> Dict[str, Any]: """ Validate market cap claims from processed slide files Args: folder_path: Path to folder containing processed slide files save_report: Whether to save the validation report to file Returns: Dictionary containing validation results and report """ slide_texts = [] # Look for JSON files in the processed folder if os.path.exists(folder_path): for filename in os.listdir(folder_path): if filename.endswith('.json'): file_path = os.path.join(folder_path, filename) try: with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) # Handle different JSON structures if isinstance(data, list): slide_texts.extend(data) elif isinstance(data, dict) and 'slides' in data: slide_texts.extend(data['slides']) elif isinstance(data, dict) and 'text' in data: slide_texts.append(data) except (json.JSONDecodeError, KeyError) as e: print(f"āš ļø Skipping invalid file {filename}: {e}") continue if not slide_texts: raise ValueError(f"No valid slide data found in {folder_path}") print(f"šŸ“ Loaded {len(slide_texts)} slides from processed folder") return self.validate_from_slides(slide_texts, save_report) def _generate_summary(self, validation_results: List) -> Dict[str, Any]: """Generate a summary of validation results""" total_claims = len(validation_results) accurate_claims = sum(1 for r in validation_results if r.is_accurate) inaccurate_claims = total_claims - accurate_claims return { 'total_claims': total_claims, 'accurate_claims': accurate_claims, 'inaccurate_claims': inaccurate_claims, 'accuracy_rate': (accurate_claims / total_claims * 100) if total_claims > 0 else 0, 'claims_by_slide': self._group_claims_by_slide(validation_results) } def _group_claims_by_slide(self, validation_results: List) -> Dict[int, List]: """Group claims by slide number""" claims_by_slide = {} for result in validation_results: slide_num = result.claim.slide_number if slide_num not in claims_by_slide: claims_by_slide[slide_num] = [] claims_by_slide[slide_num].append(result) return claims_by_slide def validate_market_caps(slide_texts: List[Dict[str, Any]], api_key: Optional[str] = None, save_report: bool = True) -> Dict[str, Any]: """ Convenience function to validate market cap claims Args: slide_texts: List of slide data with 'slide_number' and 'text' keys api_key: OpenRouter API key (optional) save_report: Whether to save the validation report to file Returns: Dictionary containing validation results and report """ validator = MarketCapValidator(api_key) return validator.validate_from_slides(slide_texts, save_report) def validate_market_caps_from_file(file_path: str, api_key: Optional[str] = None, save_report: bool = True) -> Dict[str, Any]: """ Convenience function to validate market cap claims from a file Args: file_path: Path to JSON file with slide data api_key: OpenRouter API key (optional) save_report: Whether to save the validation report to file Returns: Dictionary containing validation results and report """ validator = MarketCapValidator(api_key) return validator.validate_from_file(file_path, save_report) def validate_market_caps_from_processed(folder_path: str = "processed", api_key: Optional[str] = None, save_report: bool = True) -> Dict[str, Any]: """ Convenience function to validate market cap claims from processed folder Args: folder_path: Path to folder containing processed slide files api_key: OpenRouter API key (optional) save_report: Whether to save the validation report to file Returns: Dictionary containing validation results and report """ validator = MarketCapValidator(api_key) return validator.validate_from_processed_folder(folder_path, save_report) if __name__ == "__main__": # Example usage print("Market Cap Validator - RAG Agent") print("=================================") # Try to validate from processed folder try: results = validate_market_caps_from_processed() print(f"\nāœ… Validation Complete!") print(f"šŸ“Š Summary:") print(f" - Total Claims: {results['summary']['total_claims']}") print(f" - Accurate: {results['summary']['accurate_claims']}") print(f" - Inaccurate: {results['summary']['inaccurate_claims']}") print(f" - Accuracy Rate: {results['summary']['accuracy_rate']:.1f}%") if results['report_filename']: print(f"šŸ“„ Report saved to: {results['report_filename']}") except Exception as e: print(f"āŒ Error: {e}") print("\nUsage examples:") print("1. Place slide data JSON files in 'processed/' folder") print("2. Run: python -m modules.market_cap_validator") print("3. Or use the functions directly in your code")