#!/usr/bin/env python3 """ Extract and consolidate keywords from motherboardrepair.ca website This script reads the sst.yml and keywords.csv files from the motherboard repair website and extracts all unique repair-related keywords for our RSS feed generator. """ import yaml import csv import json from pathlib import Path def extract_keywords_from_sst(): """Extract keywords from sst.yml file""" sst_path = Path("../motherboardrepair.ca/sst.yml") with open(sst_path, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) keywords = set() # Extract global keywords if 'global' in data and 'keywords' in data['global']: global_keywords = data['global']['keywords'] for category in ['primary', 'secondary']: if category in global_keywords: keywords.update(global_keywords[category]) # Extract page-specific keywords if 'pages' in data: for page, config in data['pages'].items(): if 'keywords' in config: page_keywords = config['keywords'] for category in ['primary', 'secondary']: if category in page_keywords: # Handle both string and list formats kw_list = page_keywords[category] if isinstance(kw_list, str): keywords.add(kw_list) elif isinstance(kw_list, list): keywords.update(kw_list) return keywords def extract_keywords_from_csv(): """Extract keywords from keywords.csv file""" csv_path = Path("../motherboardrepair.ca/keywords.csv") keywords = set() with open(csv_path, 'r', encoding='utf-8') as f: reader = csv.reader(f) for row in reader: keywords.update(row) return keywords def categorize_keywords(keywords): """Categorize keywords into logical groups for RSS feeds""" categories = { "iphone_repairs": { "name": "iPhone Repair Requests", "description": "Most common iPhone repair requests", "devices": ["iPhone", "iPhone 12", "iPhone 13", "iPhone 14", "iPhone 15"], "problems": [] }, "macbook_repairs": { "name": "MacBook Repair Requests", "description": "MacBook hardware repair needs", "devices": ["MacBook", "MacBook Pro", "MacBook Air"], "problems": [] }, "ipad_repairs": { "name": "iPad Repair Requests", "description": "iPad repair and maintenance", "devices": ["iPad", "iPad Pro", "iPad Air", "iPad mini"], "problems": [] }, "laptop_repairs": { "name": "Laptop Repair Requests", "description": "General laptop repair discussions", "devices": ["laptop", "computer", "notebook"], "problems": [] }, "android_repairs": { "name": "Android Device Repairs", "description": "Android/Samsung device repair needs", "devices": ["Samsung", "Samsung Galaxy", "Galaxy", "Android"], "problems": [] }, "console_repairs": { "name": "Gaming Console Repairs", "description": "Console repair and maintenance", "devices": ["PS5", "PS4", "Xbox", "Nintendo Switch", "PlayStation"], "problems": [] }, "gpu_repairs": { "name": "GPU/Graphics Card Repairs", "description": "Graphics card and GPU repair needs", "devices": ["GPU", "graphics card", "RTX", "GTX", "NVIDIA"], "problems": [] }, "data_recovery": { "name": "Data Recovery Requests", "description": "Data recovery and storage repair", "devices": [], "problems": [] }, "general_repairs": { "name": "General Repair Services", "description": "General repair service requests", "devices": [], "problems": [] } } # Keywords that indicate problems/symptoms problem_indicators = [ "repair", "fix", "broken", "not working", "dead", "no power", "won't turn on", "won't boot", "crashed", "frozen", "slow", "won't charge", "charging port", "screen broken", "cracked screen", "water damage", "liquid damage", "spilled", "dropped", "overheating", "loud fan", "not starting", "blue screen", "kernel panic", "boot loop", "black screen", "no display", "won't connect", "connection issues", "WiFi problems", "speakers not working", "microphone broken", "camera not working", "keyboard not working", "touchpad issues", "battery dead", "hard drive failed", "SSD dead", "storage failed", "data recovery", "lost files", "recover data", "looking for repair", "need repair", "repair shop", "repair service", "professional repair", "local repair" ] # Device-specific keywords mapping device_mappings = { "iphone_repairs": ["iphone"], "macbook_repairs": ["macbook"], "ipad_repairs": ["ipad"], "laptop_repairs": ["laptop", "computer", "notebook"], "android_repairs": ["samsung", "galaxy", "android"], "console_repairs": ["ps5", "ps4", "xbox", "nintendo switch", "playstation"], "gpu_repairs": ["gpu", "graphics card", "nvidia", "rtx", "gtx"], } # Categorize each keyword for keyword in keywords: keyword_lower = keyword.lower().strip() # Check if it's a device keyword categorized = False for category, device_terms in device_mappings.items(): if any(device in keyword_lower for device in device_terms): categories[category]["problems"].append(keyword) categorized = True break # Check if it's a problem keyword if not categorized: for category_name, category_data in categories.items(): if category_name in ["data_recovery", "general_repairs"]: if any(problem in keyword_lower for problem in problem_indicators): categories[category_name]["problems"].append(keyword) categorized = True break # If not categorized, add to general repairs if not categorized and any(problem in keyword_lower for problem in problem_indicators): categories["general_repairs"]["problems"].append(keyword) # Remove duplicates and sort for category_data in categories.values(): category_data["problems"] = sorted(list(set(category_data["problems"]))) return categories def create_updated_keywords_file(categories): """Create the updated repair_keywords.json file""" # Create the structure expected by our RSS generator keywords_data = { "description": "Comprehensive repair keywords extracted from motherboardrepair.ca website", "version": "2.0", "source": "motherboardrepair.ca sst.yml and keywords.csv", "last_updated": "2026-01-19", "categories": categories, "additional_keywords": { "urgency_indicators": ["emergency", "urgent", "help needed", "asap", "quick", "fast"], "location_indicators": ["local", "near me", "in my area", "downtown", "nearby"], "service_types": ["diagnostics", "diagnostic", "troubleshooting", "microsolder", "component repair", "board repair"] } } return keywords_data def main(): print("šŸ”„ Extracting keywords from motherboardrepair.ca...") # Extract keywords from both sources sst_keywords = extract_keywords_from_sst() csv_keywords = extract_keywords_from_csv() all_keywords = sst_keywords.union(csv_keywords) print(f"āœ… Found {len(all_keywords)} unique keywords") # Categorize keywords categories = categorize_keywords(all_keywords) print("āœ… Categorized keywords into repair types") # Create updated keywords file keywords_data = create_updated_keywords_file(categories) # Save to our data directory output_path = Path("data/repair_keywords.json") with open(output_path, 'w', encoding='utf-8') as f: json.dump(keywords_data, f, indent=2, ensure_ascii=False) print(f"āœ… Updated {output_path} with website-extracted keywords") # Summary total_categorized = sum(len(cat["problems"]) for cat in categories.values()) print("\nšŸ“Š SUMMARY:") print(f" - Source keywords: {len(all_keywords)}") print(f" - Categorized keywords: {total_categorized}") print(f" - Categories: {len(categories)}") print(" - Ready for RSS feed generation!") if __name__ == "__main__": main()