#!/usr/bin/env python3 """ Modular RSS Feed Generator for Canadian Repair Monitoring This script reads from source files (keywords.json, subreddits.json) to generate both Markdown and OPML output files for consistent, repeatable RSS feed generation. Features: - Reads from external source files for easy maintenance - Generates clean Markdown output with RSS URLs - Creates OPML files for easy RSS reader import - Modular design for future updates and expansion """ import json import urllib.parse import xml.etree.ElementTree as ET from datetime import datetime from pathlib import Path from typing import Dict, List, Any class ModularRSSGenerator: def __init__(self, keywords_file="../data/repair_keywords.json", subreddits_file="../data/canadian_subreddits.json"): self.base_search_url = "https://www.reddit.com/r/{}/search.rss?q={}&sort=new&type=link" self.keywords_file = Path(keywords_file) self.subreddits_file = Path(subreddits_file) # Load source data self.keywords = self.load_keywords() self.subreddits = self.load_subreddits() # Output files self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") self.markdown_file = f"../feeds/rss_feeds_{self.timestamp}.md" self.opml_file = f"../feeds/rss_feeds_{self.timestamp}.opml" def load_keywords(self) -> Dict[str, Any]: """Load keywords from JSON source file""" try: with open(self.keywords_file, 'r', encoding='utf-8') as f: return json.load(f) except FileNotFoundError: raise FileNotFoundError(f"Keywords file not found: {self.keywords_file}") except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON in keywords file: {e}") def load_subreddits(self) -> Dict[str, Any]: """Load subreddits from JSON source file""" try: with open(self.subreddits_file, 'r', encoding='utf-8') as f: return json.load(f) except FileNotFoundError: raise FileNotFoundError(f"Subreddits file not found: {self.subreddits_file}") except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON in subreddits file: {e}") def build_search_query(self, category_data: Dict[str, Any]) -> str: """Build Reddit search query from category data""" devices = category_data.get("devices", []) problems = category_data.get("problems", []) if not devices and not problems: # For categories like data recovery that don't specify devices return " OR ".join(f'"{problem}"' for problem in problems) # Build device OR clause device_clause = " OR ".join(f'"{device}"' for device in devices) if devices else "" # Build problem OR clause problem_clause = " OR ".join(f'"{problem}"' for problem in problems) if problems else "" # Combine with AND if device_clause and problem_clause: return f"({device_clause}) AND ({problem_clause})" elif device_clause: return device_clause elif problem_clause: return problem_clause else: return "" def generate_feed_data(self) -> List[Dict[str, Any]]: """Generate RSS feed data for all combinations""" feeds = [] for priority, priority_data in self.subreddits["priorities"].items(): for subreddit_data in priority_data["subreddits"]: subreddit_name = subreddit_data["name"] for category_key, category_data in self.keywords["categories"].items(): search_query = self.build_search_query(category_data) if not search_query: continue # Skip empty queries encoded_query = urllib.parse.quote(search_query) rss_url = self.base_search_url.format(subreddit_name, encoded_query) feed_data = { "priority": priority, "priority_score": subreddit_data.get("priority_score", 0), "subreddit": subreddit_name, "subreddit_full": subreddit_data["full_name"], "province": subreddit_data["province"], "population": subreddit_data["population"], "category": category_key, "category_name": category_data["name"], "description": category_data["description"], "search_query": search_query, "rss_url": rss_url, "devices": category_data.get("devices", []), "problems": category_data.get("problems", []) } feeds.append(feed_data) # Sort by priority score (highest first), then by subreddit feeds.sort(key=lambda x: (-x["priority_score"], x["subreddit"], x["category"])) return feeds def generate_markdown_output(self, feeds: List[Dict[str, Any]]) -> str: """Generate clean markdown output""" output = [] # Header output.append("# 📡 Canadian Repair RSS Feeds") output.append("") output.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") output.append(f"**Total Feeds:** {len(feeds)}") output.append("**Strategy:** Modular generation from source files for consistent results") output.append("") output.append("---") output.append("") # Summary output.append("## 📊 SUMMARY") output.append("") opml_basename = Path(self.opml_file).name output.append(f"- **OPML File:** [{opml_basename}]({self.opml_file}) (Import into RSS readers)") output.append(f"- **Keywords Source:** [{self.keywords_file}]({self.keywords_file})") output.append(f"- **Subreddits Source:** [{self.subreddits_file}]({self.subreddits_file})") output.append(f"- **Generation Script:** [generate_modular_rss_feeds.py](generate_modular_rss_feeds.py)") output.append("") # Group feeds by priority current_priority = None current_subreddit = None for feed in feeds: # Priority header if feed["priority"] != current_priority: current_priority = feed["priority"] output.append(f"## {current_priority.upper()} PRIORITY") output.append("") # Subreddit header if feed["subreddit"] != current_subreddit: current_subreddit = feed["subreddit"] subreddit_data = next((s for p in self.subreddits["priorities"].values() for s in p["subreddits"] if s["name"] == current_subreddit), {}) output.append(f"### {subreddit_data.get('full_name', f'r/{current_subreddit}')}") if "description" in subreddit_data: output.append(f"**{subreddit_data['description']}**") output.append(f"- **Province:** {subreddit_data.get('province', 'N/A')}") output.append(f"- **Population:** {subreddit_data.get('population', 'N/A')}") output.append("") # Feed details output.append(f"#### {feed['category_name']}") output.append("") output.append(f"**Category:** {feed['category'].replace('_', ' ').title()}") output.append(f"**Description:** {feed['description']}") output.append("") if feed['devices']: output.append(f"**Devices:** {', '.join(feed['devices'][:5])}{'...' if len(feed['devices']) > 5 else ''}") if feed['problems']: output.append(f"**Problems:** {', '.join(feed['problems'][:5])}{'...' if len(feed['problems']) > 5 else ''}") output.append("") output.append("**Search Query:**") output.append(f"```\n{feed['search_query']}\n```") output.append("") output.append("**RSS URL:**") output.append(f"```\n{feed['rss_url']}\n```") output.append("") output.append("---") output.append("") # Implementation guide output.append("## 🚀 IMPLEMENTATION GUIDE") output.append("") output.append("### Phase 1: Start Small (Week 1)") output.append("- Import OPML file into your RSS reader") output.append("- Subscribe to 5-10 feeds from Toronto/Vancouver") output.append("- Monitor daily for repair opportunities") output.append("") output.append("### Phase 2: Scale Up (Weeks 2-4)") output.append("- Add Calgary, Edmonton, Montreal, Ottawa feeds") output.append("- Total: ~40 feeds across 6 major cities") output.append("") output.append("### Phase 3: Full Coverage (Month 2+)") output.append("- Add all remaining Canadian cities") output.append("- Monitor provincial subreddits") output.append("") output.append("## 🔧 RSS READER SETUP") output.append("") output.append("### Recommended Tools:") output.append("- **Feedly** - Web/mobile with OPML import") output.append("- **Inoreader** - Powerful filtering and organization") output.append("- **NetNewsWire** - Native macOS RSS reader") output.append("") output.append("### Organization Tips:") output.append("- Create folders: `Priority → City → Category`") output.append("- Set notifications for new posts") output.append("- Use starring for follow-up opportunities") return "\n".join(output) def generate_opml_output(self, feeds: List[Dict[str, Any]]) -> str: """Generate OPML XML output for RSS reader import""" # Create root element opml = ET.Element("opml", version="2.0") head = ET.SubElement(opml, "head") title = ET.SubElement(head, "title") title.text = "Canadian Repair RSS Feeds" date_created = ET.SubElement(head, "dateCreated") date_created.text = datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT") body = ET.SubElement(opml, "body") # Group feeds by priority current_priority = None current_subreddit = None current_outline = None current_sub_outline = None for feed in feeds: # Priority outline if feed["priority"] != current_priority: current_priority = feed["priority"] current_outline = ET.SubElement(body, "outline", text=f"{current_priority.upper()} PRIORITY", title=f"{current_priority.upper()} PRIORITY") # Subreddit outline if feed["subreddit"] != current_subreddit: current_subreddit = feed["subreddit"] subreddit_data = next((s for p in self.subreddits["priorities"].values() for s in p["subreddits"] if s["name"] == current_subreddit), {}) current_sub_outline = ET.SubElement(current_outline, "outline", text=subreddit_data.get("full_name", f"r/{current_subreddit}"), title=subreddit_data.get("full_name", f"r/{current_subreddit}")) # Feed outline ET.SubElement(current_sub_outline, "outline", text=feed["category_name"], title=feed["category_name"], type="rss", xmlUrl=feed["rss_url"], description=feed["description"]) # Convert to string with proper formatting rough_string = ET.tostring(opml, encoding='unicode') reparsed = ET.fromstring(rough_string) # Pretty print XML from xml.dom import minidom xml_str = minidom.parseString(ET.tostring(reparsed)).toprettyxml(indent=" ") # Remove XML declaration and clean up lines = xml_str.split('\n') # Skip the XML declaration and empty lines content_lines = [line for line in lines[1:] if line.strip()] return '\n' + '\n'.join(content_lines) def save_outputs(self): """Generate and save both markdown and OPML outputs""" print("🔄 Generating RSS feeds from source files...") # Generate feed data feeds = self.generate_feed_data() print(f"✅ Generated {len(feeds)} RSS feeds") # Generate outputs markdown_content = self.generate_markdown_output(feeds) opml_content = self.generate_opml_output(feeds) # Save files with open(self.markdown_file, 'w', encoding='utf-8') as f: f.write(markdown_content) with open(self.opml_file, 'w', encoding='utf-8') as f: f.write(opml_content) print(f"✅ Saved {self.markdown_file} ({len(markdown_content)} chars)") print(f"✅ Saved {self.opml_file} ({len(opml_content)} chars)") print("") print("📊 FEED SUMMARY:") print(f" - Total feeds: {len(feeds)}") print(f" - Source files: {self.keywords_file}, {self.subreddits_file}") print(f" - OPML ready for RSS reader import") print("") print("🚀 Ready to monitor Canadian repair discussions!") def main(): try: generator = ModularRSSGenerator() generator.save_outputs() except Exception as e: print(f"❌ Error: {e}") return 1 return 0 if __name__ == "__main__": exit(main())