# a quick little side script to look into the results # NOT used for main workflow import asyncio from playwright.async_api import async_playwright import json import os from crawl4ai import BrowserConfig, AsyncWebCrawler, CrawlerRunConfig from crawl4ai.deep_crawling import BFSDeepCrawlStrategy from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy from crawl4ai.deep_crawling.scorers import KeywordRelevanceScorer # check how many pages are invalid password pages (it was not many -- like 7/100) with open("crawl_results/successful_pages.json", "r") as f: results = json.load(f) counter = 0 total = 0 for result in results: total += 1 if not "password is invalid" in result['content']: print("\n\n\n FOUND: \n", result['content']) counter+= 1 print(f"\n\n\n FINAL GOOD: {counter} OF {total} RESULTS")