174 lines
6.2 KiB
Python
174 lines
6.2 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import re
|
|
import requests
|
|
import json
|
|
|
|
|
|
def clean_markdown_text(text):
|
|
"""Clean markdown text to ensure it's plaintext with no special characters"""
|
|
if not text:
|
|
return ""
|
|
|
|
# Remove LaTeX commands and math expressions
|
|
text = re.sub(r'\\[a-zA-Z]+\{[^}]*\}', '', text) # Remove \command{content}
|
|
text = re.sub(r'\$[^$]*\$', '', text) # Remove $math$ expressions
|
|
text = re.sub(r'\\[a-zA-Z]+', '', text) # Remove remaining \commands
|
|
|
|
# Remove markdown formatting but keep the text
|
|
text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) # Remove bold **text**
|
|
text = re.sub(r'\*([^*]+)\*', r'\1', text) # Remove italic *text*
|
|
text = re.sub(r'`([^`]+)`', r'\1', text) # Remove code `text`
|
|
text = re.sub(r'#{1,6}\s*', '', text) # Remove headers # ## ###
|
|
|
|
# Remove special characters but keep basic punctuation
|
|
text = re.sub(r'[^\w\s\.\,\!\?\;\:\-\(\)\[\]\"\'\/\&\%\@\#\$\+\=\<\>]', ' ', text)
|
|
|
|
# Clean up multiple spaces and newlines
|
|
text = re.sub(r'\s+', ' ', text)
|
|
text = re.sub(r'\n\s*\n', '\n\n', text)
|
|
|
|
return text.strip()
|
|
|
|
|
|
def create_slide_markdown(slide_data, analysis_results, slide_num, slide_text=""):
|
|
"""Create markdown content for a single slide with all agentic analyses and text content"""
|
|
|
|
markdown = f"""# Slide {slide_num}
|
|
|
|

|
|
|
|
"""
|
|
|
|
# Add text content if available
|
|
if slide_text and slide_text.strip():
|
|
# Clean the slide text to ensure it's plaintext
|
|
clean_slide_text = clean_markdown_text(slide_text)
|
|
markdown += f"""## Text Content
|
|
|
|
{clean_slide_text}
|
|
|
|
"""
|
|
|
|
markdown += """## Agentic Analysis
|
|
|
|
"""
|
|
|
|
for prompt_key, result in analysis_results.items():
|
|
# Clean the analysis text to ensure it's plaintext
|
|
clean_analysis = clean_markdown_text(result['analysis'])
|
|
|
|
markdown += f"""### {result['agent']}
|
|
|
|
{clean_analysis}
|
|
|
|
"""
|
|
|
|
markdown += "---\n\n"
|
|
return markdown
|
|
|
|
|
|
def create_text_only_markdown(markdown_content):
|
|
"""Create a text-only version of markdown without image references for API submission"""
|
|
# Remove image markdown blocks but keep the text descriptions and analysis
|
|
text_only = markdown_content
|
|
|
|
# Remove image embedding lines
|
|
text_only = re.sub(r'!\[.*?\]\(slides/.*?\)\n', '', text_only)
|
|
|
|
# Remove image link lines
|
|
text_only = re.sub(r'\*\[View full size: slides/.*?\]\(slides/.*?\)\*\n', '', text_only)
|
|
|
|
# Remove horizontal rules that were added for slide separation
|
|
text_only = re.sub(r'^---\n', '', text_only, flags=re.MULTILINE)
|
|
|
|
# Clean up extra newlines
|
|
text_only = re.sub(r'\n{3,}', '\n\n', text_only)
|
|
|
|
# Apply final text cleaning to ensure plaintext
|
|
text_only = clean_markdown_text(text_only)
|
|
|
|
return text_only.strip()
|
|
|
|
|
|
def send_to_api_and_get_haste_link(markdown_content, document_title):
|
|
"""Send markdown to API and get both raw markdown and HTML URLs"""
|
|
try:
|
|
print("Sending to API for URLs...")
|
|
|
|
# Create text-only version for API
|
|
text_only_markdown = create_text_only_markdown(markdown_content)
|
|
|
|
# First, send raw markdown to haste.nixc.us
|
|
raw_haste_url = None
|
|
try:
|
|
print(" 📝 Creating raw markdown URL...")
|
|
raw_response = requests.post(
|
|
"https://haste.nixc.us/documents",
|
|
data=text_only_markdown.encode('utf-8'),
|
|
headers={"Content-Type": "text/plain"},
|
|
timeout=30
|
|
)
|
|
|
|
if raw_response.status_code == 200:
|
|
raw_token = raw_response.text.strip().strip('"')
|
|
# Extract just the token from JSON response if needed
|
|
if raw_token.startswith('{"key":"') and raw_token.endswith('"}'):
|
|
import json
|
|
try:
|
|
token_data = json.loads(raw_token)
|
|
raw_token = token_data['key']
|
|
except:
|
|
pass
|
|
raw_haste_url = f"https://haste.nixc.us/{raw_token}"
|
|
print(f" ✅ Raw markdown URL created")
|
|
else:
|
|
print(f" ⚠️ Raw markdown upload failed with status {raw_response.status_code}")
|
|
except Exception as e:
|
|
print(f" ⚠️ Failed to create raw markdown URL: {e}")
|
|
|
|
# Then, send to md.colinknapp.com for HTML version
|
|
html_url = None
|
|
try:
|
|
print(" 🎨 Creating HTML version URL...")
|
|
api_data = {
|
|
"markdown": text_only_markdown,
|
|
"format": "html",
|
|
"template": "playful",
|
|
"title": f"Pitch Deck Analysis: {document_title}",
|
|
"subtitle": "AI-Generated Analysis with Agentic Insights",
|
|
"contact": "Generated by Pitch Deck Parser",
|
|
"send_to_haste": True
|
|
}
|
|
|
|
response = requests.post(
|
|
"https://md.colinknapp.com/api/convert",
|
|
headers={"Content-Type": "application/json"},
|
|
data=json.dumps(api_data),
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
if 'haste_url' in result:
|
|
# Extract token from haste_url and format as requested
|
|
haste_url = result['haste_url']
|
|
if 'haste.nixc.us/' in haste_url:
|
|
token = haste_url.split('haste.nixc.us/')[-1]
|
|
html_url = f"https://md.colinknapp.com/haste/{token}"
|
|
else:
|
|
html_url = haste_url
|
|
print(f" ✅ HTML version URL created")
|
|
else:
|
|
print(" ⚠️ API response missing haste_url")
|
|
else:
|
|
print(f" ⚠️ HTML API request failed with status {response.status_code}")
|
|
except Exception as e:
|
|
print(f" ⚠️ Failed to create HTML URL: {e}")
|
|
|
|
return raw_haste_url, html_url
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ Failed to send to API: {e}")
|
|
return None, None
|