234 lines
9.4 KiB
Python
234 lines
9.4 KiB
Python
#!/usr/bin/env python3
|
|
|
|
from typing import List, Dict, Any
|
|
from datetime import datetime
|
|
import os
|
|
from .rag_agent import ValidationResult, MarketCapClaim
|
|
|
|
|
|
class ValidationReportGenerator:
|
|
"""
|
|
Generates comprehensive validation reports for market cap claims
|
|
with slide source tracking
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.report_sections = []
|
|
|
|
def generate_report(self, validation_results: List[ValidationResult],
|
|
slide_texts: List[Dict[str, Any]]) -> str:
|
|
"""
|
|
Generate a comprehensive validation report
|
|
|
|
Args:
|
|
validation_results: List of ValidationResult objects
|
|
slide_texts: Original slide text data for context
|
|
|
|
Returns:
|
|
Formatted markdown report string
|
|
"""
|
|
report = []
|
|
|
|
# Header
|
|
report.append(self._generate_header())
|
|
|
|
# Executive Summary
|
|
report.append(self._generate_executive_summary(validation_results))
|
|
|
|
# Detailed Results
|
|
report.append(self._generate_detailed_results(validation_results))
|
|
|
|
# Slide Source Analysis
|
|
report.append(self._generate_slide_source_analysis(validation_results, slide_texts))
|
|
|
|
# RAG Search Details
|
|
report.append(self._generate_rag_search_details(validation_results))
|
|
|
|
# Recommendations
|
|
report.append(self._generate_recommendations(validation_results))
|
|
|
|
return '\n\n'.join(report)
|
|
|
|
def _generate_header(self) -> str:
|
|
"""Generate report header"""
|
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
return f"""# Market Cap Validation Report
|
|
|
|
**Generated:** {timestamp}
|
|
**Report Type:** RAG-Enhanced Validation Analysis
|
|
**Validation Method:** OpenRouter Web Search Integration
|
|
|
|
---
|
|
"""
|
|
|
|
def _generate_executive_summary(self, results: List[ValidationResult]) -> str:
|
|
"""Generate executive summary section"""
|
|
total_claims = len(results)
|
|
accurate_claims = sum(1 for r in results if r.is_accurate)
|
|
inaccurate_claims = total_claims - accurate_claims
|
|
high_confidence = sum(1 for r in results if r.confidence_score > 0.7)
|
|
|
|
accuracy_rate = (accurate_claims / total_claims * 100) if total_claims > 0 else 0
|
|
|
|
return f"""## Executive Summary
|
|
|
|
### Key Metrics
|
|
- **Total Market Cap Claims Analyzed:** {total_claims}
|
|
- **Claims Validated as Accurate:** {accurate_claims} ({accuracy_rate:.1f}%)
|
|
- **Claims with Discrepancies:** {inaccurate_claims}
|
|
- **High Confidence Validations:** {high_confidence}
|
|
|
|
### Overall Assessment
|
|
{'✅ **GOOD** - Most claims appear accurate' if accuracy_rate > 70 else '⚠️ **CAUTION** - Significant discrepancies found' if accuracy_rate < 50 else '🔍 **MIXED** - Some claims require verification'}
|
|
|
|
---
|
|
"""
|
|
|
|
def _generate_detailed_results(self, results: List[ValidationResult]) -> str:
|
|
"""Generate detailed validation results"""
|
|
if not results:
|
|
return "## Detailed Results\n\nNo market cap claims found in the analyzed slides.\n\n---"
|
|
|
|
report = ["## Detailed Validation Results\n"]
|
|
|
|
for i, result in enumerate(results, 1):
|
|
status_icon = "✅" if result.is_accurate else "❌" if result.discrepancy else "⚠️"
|
|
confidence_bar = self._generate_confidence_bar(result.confidence_score)
|
|
|
|
report.append(f"""### {status_icon} Claim #{i}: {result.claim.company_name}
|
|
|
|
**Slide Source:** Slide {result.claim.slide_number}
|
|
**Claimed Market Cap:** ${result.claim.claimed_market_cap}
|
|
**Raw Text:** `{result.claim.raw_text}`
|
|
**Confidence Score:** {confidence_bar} ({result.confidence_score:.2f})
|
|
|
|
**Validation Results:**
|
|
- **Validated Market Cap:** {result.validated_market_cap or 'Not found'}
|
|
- **Validation Source:** {result.validation_source}
|
|
- **Accuracy Status:** {'✅ Accurate' if result.is_accurate else '❌ Inaccurate' if result.discrepancy else '⚠️ Uncertain'}
|
|
""")
|
|
|
|
if result.discrepancy:
|
|
report.append(f"- **Discrepancy:** {result.discrepancy}")
|
|
|
|
report.append(f"- **RAG Search Query:** `{result.rag_search_query}`")
|
|
report.append("")
|
|
|
|
report.append("---")
|
|
return '\n'.join(report)
|
|
|
|
def _generate_slide_source_analysis(self, results: List[ValidationResult],
|
|
slide_texts: List[Dict[str, Any]]) -> str:
|
|
"""Generate slide source analysis section"""
|
|
report = ["## Slide Source Analysis\n"]
|
|
|
|
# Group results by slide
|
|
slide_claims = {}
|
|
for result in results:
|
|
slide_num = result.claim.slide_number
|
|
if slide_num not in slide_claims:
|
|
slide_claims[slide_num] = []
|
|
slide_claims[slide_num].append(result)
|
|
|
|
# Find slide texts
|
|
slide_text_map = {s.get('slide_number', 0): s.get('text', '') for s in slide_texts}
|
|
|
|
for slide_num in sorted(slide_claims.keys()):
|
|
claims = slide_claims[slide_num]
|
|
slide_text = slide_text_map.get(slide_num, 'No text available')
|
|
|
|
report.append(f"""### Slide {slide_num} Analysis
|
|
|
|
**Claims Found:** {len(claims)}
|
|
**Slide Text Preview:** {slide_text[:200]}{'...' if len(slide_text) > 200 else ''}
|
|
|
|
**Claims Details:**""")
|
|
|
|
for claim in claims:
|
|
status = "✅ Accurate" if any(r.claim == claim and r.is_accurate for r in results) else "❌ Inaccurate"
|
|
report.append(f"- {claim.company_name}: ${claim.claimed_market_cap} - {status}")
|
|
|
|
report.append("")
|
|
|
|
report.append("---")
|
|
return '\n'.join(report)
|
|
|
|
def _generate_rag_search_details(self, results: List[ValidationResult]) -> str:
|
|
"""Generate RAG search details section"""
|
|
report = ["## RAG Search Details\n"]
|
|
|
|
report.append("### Search Methodology")
|
|
report.append("- **Search Engine:** OpenRouter with Exa integration")
|
|
report.append("- **Model:** Mistral Small with online search enabled")
|
|
report.append("- **Search Focus:** Current market cap data (2024-2025)")
|
|
report.append("- **Validation Threshold:** 80% accuracy tolerance")
|
|
report.append("")
|
|
|
|
report.append("### Search Queries Used")
|
|
unique_queries = list(set(r.rag_search_query for r in results))
|
|
for i, query in enumerate(unique_queries, 1):
|
|
report.append(f"{i}. `{query}`")
|
|
report.append("")
|
|
|
|
report.append("### Sample RAG Responses")
|
|
for i, result in enumerate(results[:3], 1): # Show first 3 responses
|
|
report.append(f"""#### Response #{i}: {result.claim.company_name}
|
|
```
|
|
{result.rag_response[:300]}{'...' if len(result.rag_response) > 300 else ''}
|
|
```""")
|
|
|
|
report.append("---")
|
|
return '\n'.join(report)
|
|
|
|
def _generate_recommendations(self, results: List[ValidationResult]) -> str:
|
|
"""Generate recommendations section"""
|
|
inaccurate_results = [r for r in results if not r.is_accurate and r.discrepancy]
|
|
high_confidence_results = [r for r in results if r.confidence_score > 0.7]
|
|
|
|
report = ["## Recommendations\n"]
|
|
|
|
if inaccurate_results:
|
|
report.append("### ⚠️ Claims Requiring Attention")
|
|
for result in inaccurate_results:
|
|
report.append(f"- **Slide {result.claim.slide_number}:** {result.claim.company_name} - {result.discrepancy}")
|
|
report.append("")
|
|
|
|
if high_confidence_results:
|
|
report.append("### ✅ High Confidence Validations")
|
|
report.append("The following claims were validated with high confidence:")
|
|
for result in high_confidence_results:
|
|
report.append(f"- **Slide {result.claim.slide_number}:** {result.claim.company_name} - ${result.claim.claimed_market_cap}")
|
|
report.append("")
|
|
|
|
report.append("### 📋 General Recommendations")
|
|
report.append("1. **Verify Discrepancies:** Review claims marked as inaccurate with stakeholders")
|
|
report.append("2. **Update Sources:** Consider updating slide sources with more recent data")
|
|
report.append("3. **Regular Validation:** Implement periodic validation of financial claims")
|
|
report.append("4. **Source Attribution:** Always include data sources and dates in financial slides")
|
|
|
|
report.append("\n---")
|
|
report.append("*Report generated by Market Cap RAG Validation Agent*")
|
|
|
|
return '\n'.join(report)
|
|
|
|
def _generate_confidence_bar(self, confidence: float) -> str:
|
|
"""Generate a visual confidence bar"""
|
|
filled = int(confidence * 10)
|
|
empty = 10 - filled
|
|
return f"[{'█' * filled}{'░' * empty}]"
|
|
|
|
def save_report(self, report: str, filename: str = None, processed_dir: str = "processed") -> str:
|
|
"""Save report to file"""
|
|
if filename is None:
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
filename = f"market_cap_validation_report_{timestamp}.md"
|
|
|
|
# Create processed directory if it doesn't exist
|
|
os.makedirs(processed_dir, exist_ok=True)
|
|
filepath = os.path.join(processed_dir, filename)
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.write(report)
|
|
|
|
return filepath
|