236 lines
9.0 KiB
Python
236 lines
9.0 KiB
Python
#!/usr/bin/env python3
|
|
|
|
"""
|
|
Market Cap Validator - Main Interface
|
|
|
|
This module provides a simple interface to validate market cap claims
|
|
from pitch deck slides using RAG search capabilities.
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
from typing import List, Dict, Any, Optional
|
|
from .rag_agent import MarketCapRAGAgent
|
|
from .validation_report import ValidationReportGenerator
|
|
|
|
|
|
class MarketCapValidator:
|
|
"""
|
|
Main interface for market cap validation using RAG search
|
|
"""
|
|
|
|
def __init__(self, api_key: Optional[str] = None):
|
|
"""
|
|
Initialize the market cap validator
|
|
|
|
Args:
|
|
api_key: OpenRouter API key (if not provided, will use environment variable)
|
|
"""
|
|
self.rag_agent = MarketCapRAGAgent(api_key)
|
|
self.report_generator = ValidationReportGenerator()
|
|
|
|
def validate_from_slides(self, slide_texts: List[Dict[str, Any]],
|
|
save_report: bool = True) -> Dict[str, Any]:
|
|
"""
|
|
Validate market cap claims from slide text exports
|
|
|
|
Args:
|
|
slide_texts: List of slide data with 'slide_number' and 'text' keys
|
|
save_report: Whether to save the validation report to file
|
|
|
|
Returns:
|
|
Dictionary containing validation results and report
|
|
"""
|
|
print("🔍 Starting market cap validation process...")
|
|
|
|
# Extract and validate claims
|
|
validation_results = self.rag_agent.validate_all_claims(slide_texts)
|
|
|
|
# Generate report
|
|
report = self.report_generator.generate_report(validation_results, slide_texts)
|
|
|
|
# Save report if requested
|
|
report_filename = None
|
|
if save_report:
|
|
report_filename = self.report_generator.save_report(report)
|
|
print(f"📄 Validation report saved to: {report_filename}")
|
|
|
|
# Prepare summary
|
|
summary = self._generate_summary(validation_results)
|
|
|
|
return {
|
|
'validation_results': validation_results,
|
|
'report': report,
|
|
'report_filename': report_filename,
|
|
'summary': summary
|
|
}
|
|
|
|
def validate_from_file(self, file_path: str, save_report: bool = True) -> Dict[str, Any]:
|
|
"""
|
|
Validate market cap claims from a JSON file containing slide texts
|
|
|
|
Args:
|
|
file_path: Path to JSON file with slide data
|
|
save_report: Whether to save the validation report to file
|
|
|
|
Returns:
|
|
Dictionary containing validation results and report
|
|
"""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
slide_texts = json.load(f)
|
|
|
|
print(f"📁 Loaded slide data from: {file_path}")
|
|
return self.validate_from_slides(slide_texts, save_report)
|
|
|
|
except FileNotFoundError:
|
|
raise FileNotFoundError(f"File not found: {file_path}")
|
|
except json.JSONDecodeError as e:
|
|
raise ValueError(f"Invalid JSON file: {e}")
|
|
|
|
def validate_from_processed_folder(self, folder_path: str = "processed",
|
|
save_report: bool = True) -> Dict[str, Any]:
|
|
"""
|
|
Validate market cap claims from processed slide files
|
|
|
|
Args:
|
|
folder_path: Path to folder containing processed slide files
|
|
save_report: Whether to save the validation report to file
|
|
|
|
Returns:
|
|
Dictionary containing validation results and report
|
|
"""
|
|
slide_texts = []
|
|
|
|
# Look for JSON files in the processed folder
|
|
if os.path.exists(folder_path):
|
|
for filename in os.listdir(folder_path):
|
|
if filename.endswith('.json'):
|
|
file_path = os.path.join(folder_path, filename)
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Handle different JSON structures
|
|
if isinstance(data, list):
|
|
slide_texts.extend(data)
|
|
elif isinstance(data, dict) and 'slides' in data:
|
|
slide_texts.extend(data['slides'])
|
|
elif isinstance(data, dict) and 'text' in data:
|
|
slide_texts.append(data)
|
|
|
|
except (json.JSONDecodeError, KeyError) as e:
|
|
print(f"⚠️ Skipping invalid file {filename}: {e}")
|
|
continue
|
|
|
|
if not slide_texts:
|
|
raise ValueError(f"No valid slide data found in {folder_path}")
|
|
|
|
print(f"📁 Loaded {len(slide_texts)} slides from processed folder")
|
|
return self.validate_from_slides(slide_texts, save_report)
|
|
|
|
def _generate_summary(self, validation_results: List) -> Dict[str, Any]:
|
|
"""Generate a summary of validation results"""
|
|
total_claims = len(validation_results)
|
|
accurate_claims = sum(1 for r in validation_results if r.is_accurate)
|
|
inaccurate_claims = total_claims - accurate_claims
|
|
|
|
return {
|
|
'total_claims': total_claims,
|
|
'accurate_claims': accurate_claims,
|
|
'inaccurate_claims': inaccurate_claims,
|
|
'accuracy_rate': (accurate_claims / total_claims * 100) if total_claims > 0 else 0,
|
|
'claims_by_slide': self._group_claims_by_slide(validation_results)
|
|
}
|
|
|
|
def _group_claims_by_slide(self, validation_results: List) -> Dict[int, List]:
|
|
"""Group claims by slide number"""
|
|
claims_by_slide = {}
|
|
for result in validation_results:
|
|
slide_num = result.claim.slide_number
|
|
if slide_num not in claims_by_slide:
|
|
claims_by_slide[slide_num] = []
|
|
claims_by_slide[slide_num].append(result)
|
|
return claims_by_slide
|
|
|
|
|
|
def validate_market_caps(slide_texts: List[Dict[str, Any]],
|
|
api_key: Optional[str] = None,
|
|
save_report: bool = True) -> Dict[str, Any]:
|
|
"""
|
|
Convenience function to validate market cap claims
|
|
|
|
Args:
|
|
slide_texts: List of slide data with 'slide_number' and 'text' keys
|
|
api_key: OpenRouter API key (optional)
|
|
save_report: Whether to save the validation report to file
|
|
|
|
Returns:
|
|
Dictionary containing validation results and report
|
|
"""
|
|
validator = MarketCapValidator(api_key)
|
|
return validator.validate_from_slides(slide_texts, save_report)
|
|
|
|
|
|
def validate_market_caps_from_file(file_path: str,
|
|
api_key: Optional[str] = None,
|
|
save_report: bool = True) -> Dict[str, Any]:
|
|
"""
|
|
Convenience function to validate market cap claims from a file
|
|
|
|
Args:
|
|
file_path: Path to JSON file with slide data
|
|
api_key: OpenRouter API key (optional)
|
|
save_report: Whether to save the validation report to file
|
|
|
|
Returns:
|
|
Dictionary containing validation results and report
|
|
"""
|
|
validator = MarketCapValidator(api_key)
|
|
return validator.validate_from_file(file_path, save_report)
|
|
|
|
|
|
def validate_market_caps_from_processed(folder_path: str = "processed",
|
|
api_key: Optional[str] = None,
|
|
save_report: bool = True) -> Dict[str, Any]:
|
|
"""
|
|
Convenience function to validate market cap claims from processed folder
|
|
|
|
Args:
|
|
folder_path: Path to folder containing processed slide files
|
|
api_key: OpenRouter API key (optional)
|
|
save_report: Whether to save the validation report to file
|
|
|
|
Returns:
|
|
Dictionary containing validation results and report
|
|
"""
|
|
validator = MarketCapValidator(api_key)
|
|
return validator.validate_from_processed_folder(folder_path, save_report)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Example usage
|
|
print("Market Cap Validator - RAG Agent")
|
|
print("=================================")
|
|
|
|
# Try to validate from processed folder
|
|
try:
|
|
results = validate_market_caps_from_processed()
|
|
|
|
print(f"\n✅ Validation Complete!")
|
|
print(f"📊 Summary:")
|
|
print(f" - Total Claims: {results['summary']['total_claims']}")
|
|
print(f" - Accurate: {results['summary']['accurate_claims']}")
|
|
print(f" - Inaccurate: {results['summary']['inaccurate_claims']}")
|
|
print(f" - Accuracy Rate: {results['summary']['accuracy_rate']:.1f}%")
|
|
|
|
if results['report_filename']:
|
|
print(f"📄 Report saved to: {results['report_filename']}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
print("\nUsage examples:")
|
|
print("1. Place slide data JSON files in 'processed/' folder")
|
|
print("2. Run: python -m modules.market_cap_validator")
|
|
print("3. Or use the functions directly in your code")
|