rss-feedmonitor/scripts/test-reddit-patterns.js

176 lines
7.6 KiB
JavaScript

/**
* Batch test Reddit query patterns to find what works
*/
import { chromium } from 'playwright';
import { validateQuery } from './playwright-scraper.js';
import { writeFile } from 'fs/promises';
const TEST_QUERIES = [
// MacBook - Tech Support Subs
{ name: 'MacBook techsupport - won\'t turn on', query: 'site:reddit.com/r/techsupport "macbook" ("won\'t turn on" OR "dead" OR "no power")', expected: 'high' },
{ name: 'MacBook applehelp - won\'t charge', query: 'site:reddit.com/r/applehelp "macbook" ("won\'t charge" OR "not charging" OR "battery")', expected: 'high' },
{ name: 'MacBook techsupport - water damage', query: 'site:reddit.com/r/techsupport "macbook" ("spilled" OR "water damage" OR "liquid")', expected: 'medium' },
// MacBook - City Subs
{ name: 'MacBook toronto', query: 'site:reddit.com/r/toronto "macbook" "repair"', expected: 'low' },
{ name: 'MacBook vancouver', query: 'site:reddit.com/r/vancouver "macbook" "repair"', expected: 'low' },
// iPhone - Tech Support Subs
{ name: 'iPhone applehelp - won\'t turn on', query: 'site:reddit.com/r/applehelp "iphone" ("won\'t turn on" OR "dead" OR "black screen")', expected: 'high' },
{ name: 'iPhone techsupport - won\'t charge', query: 'site:reddit.com/r/techsupport "iphone" ("won\'t charge" OR "not charging")', expected: 'medium' },
// Gaming Consoles
{ name: 'PS5 techsupport', query: 'site:reddit.com/r/techsupport "ps5" ("won\'t turn on" OR "no power" OR "black screen")', expected: 'medium' },
{ name: 'Switch techsupport', query: 'site:reddit.com/r/techsupport "nintendo switch" ("won\'t charge" OR "won\'t turn on")', expected: 'medium' },
{ name: 'PS5 r/playstation', query: 'site:reddit.com/r/playstation "ps5" ("won\'t turn on" OR "repair")', expected: 'medium' },
// Data Recovery
{ name: 'Data recovery techsupport', query: 'site:reddit.com/r/techsupport ("hard drive" OR "hdd" OR "ssd") ("died" OR "won\'t mount" OR "lost files")', expected: 'medium' },
{ name: 'Data recovery datarecovery', query: 'site:reddit.com/r/datarecovery ("hard drive" OR "lost files" OR "won\'t mount")', expected: 'high' },
// Laptop General
{ name: 'Laptop techsupport - won\'t turn on', query: 'site:reddit.com/r/techsupport "laptop" ("won\'t turn on" OR "dead" OR "no power")', expected: 'high' },
{ name: 'Laptop techsupport - black screen', query: 'site:reddit.com/r/techsupport "laptop" ("black screen" OR "no display")', expected: 'high' },
];
async function main() {
console.log(`\n🔬 Testing ${TEST_QUERIES.length} Reddit query patterns\n`);
console.log(`This will take ~${Math.round(TEST_QUERIES.length * 15 / 60)} minutes with polite delays\n`);
const browser = await chromium.launch({
headless: true,
slowMo: 50,
args: [
'--disable-blink-features=AutomationControlled',
'--disable-dev-shm-usage',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process'
]
});
const results = [];
for (let i = 0; i < TEST_QUERIES.length; i++) {
const test = TEST_QUERIES[i];
console.log(`\n[${i + 1}/${TEST_QUERIES.length}] ${test.name}`);
console.log(`Query: ${test.query.substring(0, 80)}...`);
try {
const result = await validateQuery(browser, test.query);
const summary = {
name: test.name,
query: test.query,
expected: test.expected,
resultCount: result.resultCount || 0,
relevantCount: result.relevantCount || 0,
relevanceScore: result.avgRelevanceScore || 0,
recentCount: result.recentCount || 0,
success: result.success,
performance: result.relevantCount >= 5 && result.avgRelevanceScore >= 6 ? 'EXCELLENT' :
result.relevantCount >= 3 && result.avgRelevanceScore >= 4 ? 'GOOD' :
result.resultCount > 0 ? 'POOR' : 'FAILED'
};
results.push(summary);
console.log(`✓ Results: ${summary.resultCount}, Relevant: ${summary.relevantCount}, Score: ${summary.relevanceScore} - ${summary.performance}`);
// Polite delay
if (i < TEST_QUERIES.length - 1) {
const delay = 12000 + Math.random() * 3000;
console.log(` Waiting ${Math.round(delay / 1000)}s...`);
await new Promise(resolve => setTimeout(resolve, delay));
}
} catch (error) {
console.log(`✗ Error: ${error.message}`);
results.push({
name: test.name,
query: test.query,
expected: test.expected,
error: error.message,
performance: 'ERROR'
});
}
}
await browser.close();
// Generate report
console.log(`\n${'='.repeat(80)}`);
console.log(`TEST RESULTS SUMMARY`);
console.log(`${'='.repeat(80)}\n`);
const excellent = results.filter(r => r.performance === 'EXCELLENT');
const good = results.filter(r => r.performance === 'GOOD');
const poor = results.filter(r => r.performance === 'POOR');
const failed = results.filter(r => r.performance === 'FAILED' || r.performance === 'ERROR');
console.log(`Performance Breakdown:`);
console.log(` EXCELLENT (≥5 relevant, score ≥6): ${excellent.length}`);
console.log(` GOOD (≥3 relevant, score ≥4): ${good.length}`);
console.log(` POOR (has results but low quality): ${poor.length}`);
console.log(` FAILED (no results or error): ${failed.length}\n`);
if (excellent.length > 0) {
console.log(`🌟 EXCELLENT Patterns:`);
excellent.forEach(r => {
console.log(`${r.name}`);
console.log(` ${r.resultCount} results, ${r.relevantCount} relevant, score ${r.relevanceScore}`);
});
console.log(``);
}
if (good.length > 0) {
console.log(`✓ GOOD Patterns:`);
good.forEach(r => {
console.log(`${r.name}`);
console.log(` ${r.resultCount} results, ${r.relevantCount} relevant, score ${r.relevanceScore}`);
});
console.log(``);
}
// Save detailed results
const timestamp = Date.now();
const reportFile = `reddit-pattern-test-${timestamp}.json`;
await writeFile(reportFile, JSON.stringify({ timestamp: new Date().toISOString(), results }, null, 2));
console.log(`\n💾 Detailed results saved to: ${reportFile}\n`);
// Key findings
console.log(`KEY FINDINGS:\n`);
const techSupportQueries = results.filter(r => r.query.includes('techsupport'));
const cityQueries = results.filter(r => r.query.includes('toronto') || r.query.includes('vancouver'));
const avgTechSupport = techSupportQueries.reduce((sum, r) => sum + (r.relevanceScore || 0), 0) / techSupportQueries.length;
const avgCity = cityQueries.reduce((sum, r) => sum + (r.relevanceScore || 0), 0) / cityQueries.length;
console.log(`1. Tech Support Subreddits:`);
console.log(` Average Relevance: ${avgTechSupport.toFixed(1)}`);
console.log(` Best Performers: ${techSupportQueries.filter(r => r.performance === 'EXCELLENT' || r.performance === 'GOOD').length}/${techSupportQueries.length}\n`);
console.log(`2. City Subreddits:`);
console.log(` Average Relevance: ${avgCity.toFixed(1)}`);
console.log(` Best Performers: ${cityQueries.filter(r => r.performance === 'EXCELLENT' || r.performance === 'GOOD').length}/${cityQueries.length}\n`);
console.log(`3. Recommendation:`);
if (avgTechSupport > avgCity * 1.5) {
console.log(` ✓ Use tech support subreddits (r/techsupport, r/applehelp)`);
console.log(` ✓ Consumer language works well ("won't turn on", "dead")`);
console.log(` ✗ Avoid city-specific subreddits for repair queries`);
}
console.log(``);
}
if (import.meta.url === `file://${process.argv[1]}`) {
main().catch(console.error);
}