Log raw content, Add sources table if not exists

This commit is contained in:
jChenvan 2025-09-06 10:23:51 -04:00
parent 431d235e3b
commit 5700893b88
3 changed files with 15 additions and 2 deletions

View File

@ -2,4 +2,5 @@
feeds.csv feeds.csv
feed_contents.xml feed_contents.xml
page_content.json page_content.json
logs.json
__pycache__/ __pycache__/

View File

@ -1,6 +1,7 @@
import asyncio import asyncio
import csv import csv
from dataclasses import dataclass from dataclasses import dataclass
import json
import os import os
from typing import Dict, List, Tuple from typing import Dict, List, Tuple
import feedparser import feedparser
@ -135,7 +136,6 @@ async def fetch_site(url: str) -> str | None:
main_text = clean_string(main_text) main_text = clean_string(main_text)
print(f"SUCCESSFUL FETCH: {url}") print(f"SUCCESSFUL FETCH: {url}")
print(f"FETCH CONTENT: {main_text[:140]}...")
# .get_text() with separator and strip for cleaner output # .get_text() with separator and strip for cleaner output
return main_text return main_text
else: else:
@ -145,7 +145,6 @@ async def fetch_site(url: str) -> str | None:
body_text = soup.body.get_text(separator='\n', strip=True) body_text = soup.body.get_text(separator='\n', strip=True)
body_text = clean_string(body_text) body_text = clean_string(body_text)
print(f"SUCCESSFUL FETCH: {url}") print(f"SUCCESSFUL FETCH: {url}")
print(f"FETCH CONTENT: {body_text[:140]}...")
return body_text return body_text
except Exception as e: except Exception as e:
@ -192,4 +191,9 @@ async def get_all_feed_contents() -> List[Dict[str, str]]:
}) })
print(f"\nSuccessfully fetched {len(pages)} webpages.") print(f"\nSuccessfully fetched {len(pages)} webpages.")
with open("logs.json", "w") as f:
json.dump({
"urls":urls,
"results": results
}, f, indent = 4)
return pages return pages

View File

@ -979,6 +979,14 @@ def view_sources():
try: try:
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute('''
CREATE TABLE IF NOT EXISTS sources (
src_id SERIAL PRIMARY KEY,
title VARCHAR(255) NOT NULL,
link VARCHAR(255) NOT NULL,
type VARCHAR(255) NOT NULL
)
''')
cur.execute('SELECT * FROM sources ORDER BY src_id DESC') cur.execute('SELECT * FROM sources ORDER BY src_id DESC')
sources = cur.fetchall() sources = cur.fetchall()
except Exception as e: except Exception as e: