227 lines
8.7 KiB
Python
227 lines
8.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Extract and consolidate keywords from motherboardrepair.ca website
|
|
|
|
This script reads the sst.yml and keywords.csv files from the motherboard repair
|
|
website and extracts all unique repair-related keywords for our RSS feed generator.
|
|
"""
|
|
|
|
import yaml
|
|
import csv
|
|
import json
|
|
from pathlib import Path
|
|
|
|
def extract_keywords_from_sst():
|
|
"""Extract keywords from sst.yml file"""
|
|
sst_path = Path("../motherboardrepair.ca/sst.yml")
|
|
|
|
with open(sst_path, 'r', encoding='utf-8') as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
keywords = set()
|
|
|
|
# Extract global keywords
|
|
if 'global' in data and 'keywords' in data['global']:
|
|
global_keywords = data['global']['keywords']
|
|
for category in ['primary', 'secondary']:
|
|
if category in global_keywords:
|
|
keywords.update(global_keywords[category])
|
|
|
|
# Extract page-specific keywords
|
|
if 'pages' in data:
|
|
for page, config in data['pages'].items():
|
|
if 'keywords' in config:
|
|
page_keywords = config['keywords']
|
|
for category in ['primary', 'secondary']:
|
|
if category in page_keywords:
|
|
# Handle both string and list formats
|
|
kw_list = page_keywords[category]
|
|
if isinstance(kw_list, str):
|
|
keywords.add(kw_list)
|
|
elif isinstance(kw_list, list):
|
|
keywords.update(kw_list)
|
|
|
|
return keywords
|
|
|
|
def extract_keywords_from_csv():
|
|
"""Extract keywords from keywords.csv file"""
|
|
csv_path = Path("../motherboardrepair.ca/keywords.csv")
|
|
|
|
keywords = set()
|
|
|
|
with open(csv_path, 'r', encoding='utf-8') as f:
|
|
reader = csv.reader(f)
|
|
for row in reader:
|
|
keywords.update(row)
|
|
|
|
return keywords
|
|
|
|
def categorize_keywords(keywords):
|
|
"""Categorize keywords into logical groups for RSS feeds"""
|
|
categories = {
|
|
"iphone_repairs": {
|
|
"name": "iPhone Repair Requests",
|
|
"description": "Most common iPhone repair requests",
|
|
"devices": ["iPhone", "iPhone 12", "iPhone 13", "iPhone 14", "iPhone 15"],
|
|
"problems": []
|
|
},
|
|
"macbook_repairs": {
|
|
"name": "MacBook Repair Requests",
|
|
"description": "MacBook hardware repair needs",
|
|
"devices": ["MacBook", "MacBook Pro", "MacBook Air"],
|
|
"problems": []
|
|
},
|
|
"ipad_repairs": {
|
|
"name": "iPad Repair Requests",
|
|
"description": "iPad repair and maintenance",
|
|
"devices": ["iPad", "iPad Pro", "iPad Air", "iPad mini"],
|
|
"problems": []
|
|
},
|
|
"laptop_repairs": {
|
|
"name": "Laptop Repair Requests",
|
|
"description": "General laptop repair discussions",
|
|
"devices": ["laptop", "computer", "notebook"],
|
|
"problems": []
|
|
},
|
|
"android_repairs": {
|
|
"name": "Android Device Repairs",
|
|
"description": "Android/Samsung device repair needs",
|
|
"devices": ["Samsung", "Samsung Galaxy", "Galaxy", "Android"],
|
|
"problems": []
|
|
},
|
|
"console_repairs": {
|
|
"name": "Gaming Console Repairs",
|
|
"description": "Console repair and maintenance",
|
|
"devices": ["PS5", "PS4", "Xbox", "Nintendo Switch", "PlayStation"],
|
|
"problems": []
|
|
},
|
|
"gpu_repairs": {
|
|
"name": "GPU/Graphics Card Repairs",
|
|
"description": "Graphics card and GPU repair needs",
|
|
"devices": ["GPU", "graphics card", "RTX", "GTX", "NVIDIA"],
|
|
"problems": []
|
|
},
|
|
"data_recovery": {
|
|
"name": "Data Recovery Requests",
|
|
"description": "Data recovery and storage repair",
|
|
"devices": [],
|
|
"problems": []
|
|
},
|
|
"general_repairs": {
|
|
"name": "General Repair Services",
|
|
"description": "General repair service requests",
|
|
"devices": [],
|
|
"problems": []
|
|
}
|
|
}
|
|
|
|
# Keywords that indicate problems/symptoms
|
|
problem_indicators = [
|
|
"repair", "fix", "broken", "not working", "dead", "no power",
|
|
"won't turn on", "won't boot", "crashed", "frozen", "slow",
|
|
"won't charge", "charging port", "screen broken", "cracked screen",
|
|
"water damage", "liquid damage", "spilled", "dropped",
|
|
"overheating", "loud fan", "not starting", "blue screen",
|
|
"kernel panic", "boot loop", "black screen", "no display",
|
|
"won't connect", "connection issues", "WiFi problems",
|
|
"speakers not working", "microphone broken", "camera not working",
|
|
"keyboard not working", "touchpad issues", "battery dead",
|
|
"hard drive failed", "SSD dead", "storage failed",
|
|
"data recovery", "lost files", "recover data",
|
|
"looking for repair", "need repair", "repair shop", "repair service",
|
|
"professional repair", "local repair"
|
|
]
|
|
|
|
# Device-specific keywords mapping
|
|
device_mappings = {
|
|
"iphone_repairs": ["iphone"],
|
|
"macbook_repairs": ["macbook"],
|
|
"ipad_repairs": ["ipad"],
|
|
"laptop_repairs": ["laptop", "computer", "notebook"],
|
|
"android_repairs": ["samsung", "galaxy", "android"],
|
|
"console_repairs": ["ps5", "ps4", "xbox", "nintendo switch", "playstation"],
|
|
"gpu_repairs": ["gpu", "graphics card", "nvidia", "rtx", "gtx"],
|
|
}
|
|
|
|
# Categorize each keyword
|
|
for keyword in keywords:
|
|
keyword_lower = keyword.lower().strip()
|
|
|
|
# Check if it's a device keyword
|
|
categorized = False
|
|
for category, device_terms in device_mappings.items():
|
|
if any(device in keyword_lower for device in device_terms):
|
|
categories[category]["problems"].append(keyword)
|
|
categorized = True
|
|
break
|
|
|
|
# Check if it's a problem keyword
|
|
if not categorized:
|
|
for category_name, category_data in categories.items():
|
|
if category_name in ["data_recovery", "general_repairs"]:
|
|
if any(problem in keyword_lower for problem in problem_indicators):
|
|
categories[category_name]["problems"].append(keyword)
|
|
categorized = True
|
|
break
|
|
|
|
# If not categorized, add to general repairs
|
|
if not categorized and any(problem in keyword_lower for problem in problem_indicators):
|
|
categories["general_repairs"]["problems"].append(keyword)
|
|
|
|
# Remove duplicates and sort
|
|
for category_data in categories.values():
|
|
category_data["problems"] = sorted(list(set(category_data["problems"])))
|
|
|
|
return categories
|
|
|
|
def create_updated_keywords_file(categories):
|
|
"""Create the updated repair_keywords.json file"""
|
|
# Create the structure expected by our RSS generator
|
|
keywords_data = {
|
|
"description": "Comprehensive repair keywords extracted from motherboardrepair.ca website",
|
|
"version": "2.0",
|
|
"source": "motherboardrepair.ca sst.yml and keywords.csv",
|
|
"last_updated": "2026-01-19",
|
|
"categories": categories,
|
|
"additional_keywords": {
|
|
"urgency_indicators": ["emergency", "urgent", "help needed", "asap", "quick", "fast"],
|
|
"location_indicators": ["local", "near me", "in my area", "downtown", "nearby"],
|
|
"service_types": ["diagnostics", "diagnostic", "troubleshooting", "microsolder", "component repair", "board repair"]
|
|
}
|
|
}
|
|
|
|
return keywords_data
|
|
|
|
def main():
|
|
print("🔄 Extracting keywords from motherboardrepair.ca...")
|
|
|
|
# Extract keywords from both sources
|
|
sst_keywords = extract_keywords_from_sst()
|
|
csv_keywords = extract_keywords_from_csv()
|
|
|
|
all_keywords = sst_keywords.union(csv_keywords)
|
|
print(f"✅ Found {len(all_keywords)} unique keywords")
|
|
|
|
# Categorize keywords
|
|
categories = categorize_keywords(all_keywords)
|
|
print("✅ Categorized keywords into repair types")
|
|
|
|
# Create updated keywords file
|
|
keywords_data = create_updated_keywords_file(categories)
|
|
|
|
# Save to our data directory
|
|
output_path = Path("data/repair_keywords.json")
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(keywords_data, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"✅ Updated {output_path} with website-extracted keywords")
|
|
|
|
# Summary
|
|
total_categorized = sum(len(cat["problems"]) for cat in categories.values())
|
|
print("\n📊 SUMMARY:")
|
|
print(f" - Source keywords: {len(all_keywords)}")
|
|
print(f" - Categorized keywords: {total_categorized}")
|
|
print(f" - Categories: {len(categories)}")
|
|
print(" - Ready for RSS feed generation!")
|
|
if __name__ == "__main__":
|
|
main() |