technical-screen-2025-10-22/modules/market_cap_validator.py

#!/usr/bin/env python3

"""
Market Cap Validator - Main Interface

This module provides a simple interface to validate market cap claims
from pitch deck slides using RAG search capabilities.
"""

import os
import json
from typing import List, Dict, Any, Optional
from .rag_agent import MarketCapRAGAgent
from .validation_report import ValidationReportGenerator


class MarketCapValidator:
    """
    Main interface for market cap validation using RAG search
    """

    def __init__(self, api_key: Optional[str] = None):
        """
        Initialize the market cap validator

        Args:
            api_key: OpenRouter API key (if not provided, will use environment variable)
        """
        self.rag_agent = MarketCapRAGAgent(api_key)
        self.report_generator = ValidationReportGenerator()

    def validate_from_slides(self, slide_texts: List[Dict[str, Any]],
                           save_report: bool = True) -> Dict[str, Any]:
        """
        Validate market cap claims from slide text exports

        Args:
            slide_texts: List of slide data with 'slide_number' and 'text' keys
            save_report: Whether to save the validation report to file

        Returns:
            Dictionary containing validation results and report
        """
        print("🔍 Starting market cap validation process...")

        # Extract and validate claims
        validation_results = self.rag_agent.validate_all_claims(slide_texts)

        # Generate report
        report = self.report_generator.generate_report(validation_results, slide_texts)

        # Save report if requested
        report_filename = None
        if save_report:
            report_filename = self.report_generator.save_report(report)
            print(f"📄 Validation report saved to: {report_filename}")

        # Prepare summary
        summary = self._generate_summary(validation_results)

        return {
            'validation_results': validation_results,
            'report': report,
            'report_filename': report_filename,
            'summary': summary
        }

    def validate_from_file(self, file_path: str, save_report: bool = True) -> Dict[str, Any]:
        """
        Validate market cap claims from a JSON file containing slide texts

        Args:
            file_path: Path to JSON file with slide data
            save_report: Whether to save the validation report to file

        Returns:
            Dictionary containing validation results and report
        """
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                slide_texts = json.load(f)

            print(f"📁 Loaded slide data from: {file_path}")
            return self.validate_from_slides(slide_texts, save_report)

        except FileNotFoundError:
            raise FileNotFoundError(f"File not found: {file_path}")
        except json.JSONDecodeError as e:
            raise ValueError(f"Invalid JSON file: {e}")

    def validate_from_processed_folder(self, folder_path: str = "processed",
                                    save_report: bool = True) -> Dict[str, Any]:
        """
        Validate market cap claims from processed slide files

        Args:
            folder_path: Path to folder containing processed slide files
            save_report: Whether to save the validation report to file

        Returns:
            Dictionary containing validation results and report
        """
        slide_texts = []

        # Look for JSON files in the processed folder
        if os.path.exists(folder_path):
            for filename in os.listdir(folder_path):
                if filename.endswith('.json'):
                    file_path = os.path.join(folder_path, filename)
                    try:
                        with open(file_path, 'r', encoding='utf-8') as f:
                            data = json.load(f)

                        # Handle different JSON structures
                        if isinstance(data, list):
                            slide_texts.extend(data)
                        elif isinstance(data, dict) and 'slides' in data:
                            slide_texts.extend(data['slides'])
                        elif isinstance(data, dict) and 'text' in data:
                            slide_texts.append(data)

                    except (json.JSONDecodeError, KeyError) as e:
                        print(f"⚠️ Skipping invalid file {filename}: {e}")
                        continue

        if not slide_texts:
            raise ValueError(f"No valid slide data found in {folder_path}")

        print(f"📁 Loaded {len(slide_texts)} slides from processed folder")
        return self.validate_from_slides(slide_texts, save_report)

    def _generate_summary(self, validation_results: List) -> Dict[str, Any]:
        """Generate a summary of validation results"""
        total_claims = len(validation_results)
        accurate_claims = sum(1 for r in validation_results if r.is_accurate)
        inaccurate_claims = total_claims - accurate_claims

        return {
            'total_claims': total_claims,
            'accurate_claims': accurate_claims,
            'inaccurate_claims': inaccurate_claims,
            'accuracy_rate': (accurate_claims / total_claims * 100) if total_claims > 0 else 0,
            'claims_by_slide': self._group_claims_by_slide(validation_results)
        }

    def _group_claims_by_slide(self, validation_results: List) -> Dict[int, List]:
        """Group claims by slide number"""
        claims_by_slide = {}
        for result in validation_results:
            slide_num = result.claim.slide_number
            if slide_num not in claims_by_slide:
                claims_by_slide[slide_num] = []
            claims_by_slide[slide_num].append(result)
        return claims_by_slide


def validate_market_caps(slide_texts: List[Dict[str, Any]],
                       api_key: Optional[str] = None,
                       save_report: bool = True) -> Dict[str, Any]:
    """
    Convenience function to validate market cap claims

    Args:
        slide_texts: List of slide data with 'slide_number' and 'text' keys
        api_key: OpenRouter API key (optional)
        save_report: Whether to save the validation report to file

    Returns:
        Dictionary containing validation results and report
    """
    validator = MarketCapValidator(api_key)
    return validator.validate_from_slides(slide_texts, save_report)


def validate_market_caps_from_file(file_path: str,
                                 api_key: Optional[str] = None,
                                 save_report: bool = True) -> Dict[str, Any]:
    """
    Convenience function to validate market cap claims from a file

    Args:
        file_path: Path to JSON file with slide data
        api_key: OpenRouter API key (optional)
        save_report: Whether to save the validation report to file

    Returns:
        Dictionary containing validation results and report
    """
    validator = MarketCapValidator(api_key)
    return validator.validate_from_file(file_path, save_report)


def validate_market_caps_from_processed(folder_path: str = "processed",
                                       api_key: Optional[str] = None,
                                       save_report: bool = True) -> Dict[str, Any]:
    """
    Convenience function to validate market cap claims from processed folder

    Args:
        folder_path: Path to folder containing processed slide files
        api_key: OpenRouter API key (optional)
        save_report: Whether to save the validation report to file

    Returns:
        Dictionary containing validation results and report
    """
    validator = MarketCapValidator(api_key)
    return validator.validate_from_processed_folder(folder_path, save_report)


if __name__ == "__main__":
    # Example usage
    print("Market Cap Validator - RAG Agent")
    print("=================================")

    # Try to validate from processed folder
    try:
        results = validate_market_caps_from_processed()

        print(f"\n✅ Validation Complete!")
        print(f"📊 Summary:")
        print(f"   - Total Claims: {results['summary']['total_claims']}")
        print(f"   - Accurate: {results['summary']['accurate_claims']}")
        print(f"   - Inaccurate: {results['summary']['inaccurate_claims']}")
        print(f"   - Accuracy Rate: {results['summary']['accuracy_rate']:.1f}%")

        if results['report_filename']:
            print(f"📄 Report saved to: {results['report_filename']}")

    except Exception as e:
        print(f"❌ Error: {e}")
        print("\nUsage examples:")
        print("1. Place slide data JSON files in 'processed/' folder")
        print("2. Run: python -m modules.market_cap_validator")
        print("3. Or use the functions directly in your code")