technical-screen-2025-10-22/modules/validation_report.py

234 lines
9.4 KiB
Python

#!/usr/bin/env python3
from typing import List, Dict, Any
from datetime import datetime
import os
from .rag_agent import ValidationResult, MarketCapClaim
class ValidationReportGenerator:
"""
Generates comprehensive validation reports for market cap claims
with slide source tracking
"""
def __init__(self):
self.report_sections = []
def generate_report(self, validation_results: List[ValidationResult],
slide_texts: List[Dict[str, Any]]) -> str:
"""
Generate a comprehensive validation report
Args:
validation_results: List of ValidationResult objects
slide_texts: Original slide text data for context
Returns:
Formatted markdown report string
"""
report = []
# Header
report.append(self._generate_header())
# Executive Summary
report.append(self._generate_executive_summary(validation_results))
# Detailed Results
report.append(self._generate_detailed_results(validation_results))
# Slide Source Analysis
report.append(self._generate_slide_source_analysis(validation_results, slide_texts))
# RAG Search Details
report.append(self._generate_rag_search_details(validation_results))
# Recommendations
report.append(self._generate_recommendations(validation_results))
return '\n\n'.join(report)
def _generate_header(self) -> str:
"""Generate report header"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
return f"""# Market Cap Validation Report
**Generated:** {timestamp}
**Report Type:** RAG-Enhanced Validation Analysis
**Validation Method:** OpenRouter Web Search Integration
---
"""
def _generate_executive_summary(self, results: List[ValidationResult]) -> str:
"""Generate executive summary section"""
total_claims = len(results)
accurate_claims = sum(1 for r in results if r.is_accurate)
inaccurate_claims = total_claims - accurate_claims
high_confidence = sum(1 for r in results if r.confidence_score > 0.7)
accuracy_rate = (accurate_claims / total_claims * 100) if total_claims > 0 else 0
return f"""## Executive Summary
### Key Metrics
- **Total Market Cap Claims Analyzed:** {total_claims}
- **Claims Validated as Accurate:** {accurate_claims} ({accuracy_rate:.1f}%)
- **Claims with Discrepancies:** {inaccurate_claims}
- **High Confidence Validations:** {high_confidence}
### Overall Assessment
{'✅ **GOOD** - Most claims appear accurate' if accuracy_rate > 70 else '⚠️ **CAUTION** - Significant discrepancies found' if accuracy_rate < 50 else '🔍 **MIXED** - Some claims require verification'}
---
"""
def _generate_detailed_results(self, results: List[ValidationResult]) -> str:
"""Generate detailed validation results"""
if not results:
return "## Detailed Results\n\nNo market cap claims found in the analyzed slides.\n\n---"
report = ["## Detailed Validation Results\n"]
for i, result in enumerate(results, 1):
status_icon = "" if result.is_accurate else "" if result.discrepancy else "⚠️"
confidence_bar = self._generate_confidence_bar(result.confidence_score)
report.append(f"""### {status_icon} Claim #{i}: {result.claim.company_name}
**Slide Source:** Slide {result.claim.slide_number}
**Claimed Market Cap:** ${result.claim.claimed_market_cap}
**Raw Text:** `{result.claim.raw_text}`
**Confidence Score:** {confidence_bar} ({result.confidence_score:.2f})
**Validation Results:**
- **Validated Market Cap:** {result.validated_market_cap or 'Not found'}
- **Validation Source:** {result.validation_source}
- **Accuracy Status:** {'✅ Accurate' if result.is_accurate else '❌ Inaccurate' if result.discrepancy else '⚠️ Uncertain'}
""")
if result.discrepancy:
report.append(f"- **Discrepancy:** {result.discrepancy}")
report.append(f"- **RAG Search Query:** `{result.rag_search_query}`")
report.append("")
report.append("---")
return '\n'.join(report)
def _generate_slide_source_analysis(self, results: List[ValidationResult],
slide_texts: List[Dict[str, Any]]) -> str:
"""Generate slide source analysis section"""
report = ["## Slide Source Analysis\n"]
# Group results by slide
slide_claims = {}
for result in results:
slide_num = result.claim.slide_number
if slide_num not in slide_claims:
slide_claims[slide_num] = []
slide_claims[slide_num].append(result)
# Find slide texts
slide_text_map = {s.get('slide_number', 0): s.get('text', '') for s in slide_texts}
for slide_num in sorted(slide_claims.keys()):
claims = slide_claims[slide_num]
slide_text = slide_text_map.get(slide_num, 'No text available')
report.append(f"""### Slide {slide_num} Analysis
**Claims Found:** {len(claims)}
**Slide Text Preview:** {slide_text[:200]}{'...' if len(slide_text) > 200 else ''}
**Claims Details:**""")
for claim in claims:
status = "✅ Accurate" if any(r.claim == claim and r.is_accurate for r in results) else "❌ Inaccurate"
report.append(f"- {claim.company_name}: ${claim.claimed_market_cap} - {status}")
report.append("")
report.append("---")
return '\n'.join(report)
def _generate_rag_search_details(self, results: List[ValidationResult]) -> str:
"""Generate RAG search details section"""
report = ["## RAG Search Details\n"]
report.append("### Search Methodology")
report.append("- **Search Engine:** OpenRouter with Exa integration")
report.append("- **Model:** Mistral Small with online search enabled")
report.append("- **Search Focus:** Current market cap data (2024-2025)")
report.append("- **Validation Threshold:** 80% accuracy tolerance")
report.append("")
report.append("### Search Queries Used")
unique_queries = list(set(r.rag_search_query for r in results))
for i, query in enumerate(unique_queries, 1):
report.append(f"{i}. `{query}`")
report.append("")
report.append("### Sample RAG Responses")
for i, result in enumerate(results[:3], 1): # Show first 3 responses
report.append(f"""#### Response #{i}: {result.claim.company_name}
```
{result.rag_response[:300]}{'...' if len(result.rag_response) > 300 else ''}
```""")
report.append("---")
return '\n'.join(report)
def _generate_recommendations(self, results: List[ValidationResult]) -> str:
"""Generate recommendations section"""
inaccurate_results = [r for r in results if not r.is_accurate and r.discrepancy]
high_confidence_results = [r for r in results if r.confidence_score > 0.7]
report = ["## Recommendations\n"]
if inaccurate_results:
report.append("### ⚠️ Claims Requiring Attention")
for result in inaccurate_results:
report.append(f"- **Slide {result.claim.slide_number}:** {result.claim.company_name} - {result.discrepancy}")
report.append("")
if high_confidence_results:
report.append("### ✅ High Confidence Validations")
report.append("The following claims were validated with high confidence:")
for result in high_confidence_results:
report.append(f"- **Slide {result.claim.slide_number}:** {result.claim.company_name} - ${result.claim.claimed_market_cap}")
report.append("")
report.append("### 📋 General Recommendations")
report.append("1. **Verify Discrepancies:** Review claims marked as inaccurate with stakeholders")
report.append("2. **Update Sources:** Consider updating slide sources with more recent data")
report.append("3. **Regular Validation:** Implement periodic validation of financial claims")
report.append("4. **Source Attribution:** Always include data sources and dates in financial slides")
report.append("\n---")
report.append("*Report generated by Market Cap RAG Validation Agent*")
return '\n'.join(report)
def _generate_confidence_bar(self, confidence: float) -> str:
"""Generate a visual confidence bar"""
filled = int(confidence * 10)
empty = 10 - filled
return f"[{'' * filled}{'' * empty}]"
def save_report(self, report: str, filename: str = None, processed_dir: str = "processed") -> str:
"""Save report to file"""
if filename is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"market_cap_validation_report_{timestamp}.md"
# Create processed directory if it doesn't exist
os.makedirs(processed_dir, exist_ok=True)
filepath = os.path.join(processed_dir, filename)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(report)
return filepath