technical-screen-2025-10-22/modules/market_cap_validator.py

236 lines
9.0 KiB
Python

#!/usr/bin/env python3
"""
Market Cap Validator - Main Interface
This module provides a simple interface to validate market cap claims
from pitch deck slides using RAG search capabilities.
"""
import os
import json
from typing import List, Dict, Any, Optional
from .rag_agent import MarketCapRAGAgent
from .validation_report import ValidationReportGenerator
class MarketCapValidator:
"""
Main interface for market cap validation using RAG search
"""
def __init__(self, api_key: Optional[str] = None):
"""
Initialize the market cap validator
Args:
api_key: OpenRouter API key (if not provided, will use environment variable)
"""
self.rag_agent = MarketCapRAGAgent(api_key)
self.report_generator = ValidationReportGenerator()
def validate_from_slides(self, slide_texts: List[Dict[str, Any]],
save_report: bool = True) -> Dict[str, Any]:
"""
Validate market cap claims from slide text exports
Args:
slide_texts: List of slide data with 'slide_number' and 'text' keys
save_report: Whether to save the validation report to file
Returns:
Dictionary containing validation results and report
"""
print("🔍 Starting market cap validation process...")
# Extract and validate claims
validation_results = self.rag_agent.validate_all_claims(slide_texts)
# Generate report
report = self.report_generator.generate_report(validation_results, slide_texts)
# Save report if requested
report_filename = None
if save_report:
report_filename = self.report_generator.save_report(report)
print(f"📄 Validation report saved to: {report_filename}")
# Prepare summary
summary = self._generate_summary(validation_results)
return {
'validation_results': validation_results,
'report': report,
'report_filename': report_filename,
'summary': summary
}
def validate_from_file(self, file_path: str, save_report: bool = True) -> Dict[str, Any]:
"""
Validate market cap claims from a JSON file containing slide texts
Args:
file_path: Path to JSON file with slide data
save_report: Whether to save the validation report to file
Returns:
Dictionary containing validation results and report
"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
slide_texts = json.load(f)
print(f"📁 Loaded slide data from: {file_path}")
return self.validate_from_slides(slide_texts, save_report)
except FileNotFoundError:
raise FileNotFoundError(f"File not found: {file_path}")
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON file: {e}")
def validate_from_processed_folder(self, folder_path: str = "processed",
save_report: bool = True) -> Dict[str, Any]:
"""
Validate market cap claims from processed slide files
Args:
folder_path: Path to folder containing processed slide files
save_report: Whether to save the validation report to file
Returns:
Dictionary containing validation results and report
"""
slide_texts = []
# Look for JSON files in the processed folder
if os.path.exists(folder_path):
for filename in os.listdir(folder_path):
if filename.endswith('.json'):
file_path = os.path.join(folder_path, filename)
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Handle different JSON structures
if isinstance(data, list):
slide_texts.extend(data)
elif isinstance(data, dict) and 'slides' in data:
slide_texts.extend(data['slides'])
elif isinstance(data, dict) and 'text' in data:
slide_texts.append(data)
except (json.JSONDecodeError, KeyError) as e:
print(f"⚠️ Skipping invalid file {filename}: {e}")
continue
if not slide_texts:
raise ValueError(f"No valid slide data found in {folder_path}")
print(f"📁 Loaded {len(slide_texts)} slides from processed folder")
return self.validate_from_slides(slide_texts, save_report)
def _generate_summary(self, validation_results: List) -> Dict[str, Any]:
"""Generate a summary of validation results"""
total_claims = len(validation_results)
accurate_claims = sum(1 for r in validation_results if r.is_accurate)
inaccurate_claims = total_claims - accurate_claims
return {
'total_claims': total_claims,
'accurate_claims': accurate_claims,
'inaccurate_claims': inaccurate_claims,
'accuracy_rate': (accurate_claims / total_claims * 100) if total_claims > 0 else 0,
'claims_by_slide': self._group_claims_by_slide(validation_results)
}
def _group_claims_by_slide(self, validation_results: List) -> Dict[int, List]:
"""Group claims by slide number"""
claims_by_slide = {}
for result in validation_results:
slide_num = result.claim.slide_number
if slide_num not in claims_by_slide:
claims_by_slide[slide_num] = []
claims_by_slide[slide_num].append(result)
return claims_by_slide
def validate_market_caps(slide_texts: List[Dict[str, Any]],
api_key: Optional[str] = None,
save_report: bool = True) -> Dict[str, Any]:
"""
Convenience function to validate market cap claims
Args:
slide_texts: List of slide data with 'slide_number' and 'text' keys
api_key: OpenRouter API key (optional)
save_report: Whether to save the validation report to file
Returns:
Dictionary containing validation results and report
"""
validator = MarketCapValidator(api_key)
return validator.validate_from_slides(slide_texts, save_report)
def validate_market_caps_from_file(file_path: str,
api_key: Optional[str] = None,
save_report: bool = True) -> Dict[str, Any]:
"""
Convenience function to validate market cap claims from a file
Args:
file_path: Path to JSON file with slide data
api_key: OpenRouter API key (optional)
save_report: Whether to save the validation report to file
Returns:
Dictionary containing validation results and report
"""
validator = MarketCapValidator(api_key)
return validator.validate_from_file(file_path, save_report)
def validate_market_caps_from_processed(folder_path: str = "processed",
api_key: Optional[str] = None,
save_report: bool = True) -> Dict[str, Any]:
"""
Convenience function to validate market cap claims from processed folder
Args:
folder_path: Path to folder containing processed slide files
api_key: OpenRouter API key (optional)
save_report: Whether to save the validation report to file
Returns:
Dictionary containing validation results and report
"""
validator = MarketCapValidator(api_key)
return validator.validate_from_processed_folder(folder_path, save_report)
if __name__ == "__main__":
# Example usage
print("Market Cap Validator - RAG Agent")
print("=================================")
# Try to validate from processed folder
try:
results = validate_market_caps_from_processed()
print(f"\n✅ Validation Complete!")
print(f"📊 Summary:")
print(f" - Total Claims: {results['summary']['total_claims']}")
print(f" - Accurate: {results['summary']['accurate_claims']}")
print(f" - Inaccurate: {results['summary']['inaccurate_claims']}")
print(f" - Accuracy Rate: {results['summary']['accuracy_rate']:.1f}%")
if results['report_filename']:
print(f"📄 Report saved to: {results['report_filename']}")
except Exception as e:
print(f"❌ Error: {e}")
print("\nUsage examples:")
print("1. Place slide data JSON files in 'processed/' folder")
print("2. Run: python -m modules.market_cap_validator")
print("3. Or use the functions directly in your code")