Log raw content, Add sources table if not exists
This commit is contained in:
parent
431d235e3b
commit
5700893b88
|
@ -2,4 +2,5 @@
|
|||
feeds.csv
|
||||
feed_contents.xml
|
||||
page_content.json
|
||||
logs.json
|
||||
__pycache__/
|
|
@ -1,6 +1,7 @@
|
|||
import asyncio
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, List, Tuple
|
||||
import feedparser
|
||||
|
@ -135,7 +136,6 @@ async def fetch_site(url: str) -> str | None:
|
|||
main_text = clean_string(main_text)
|
||||
|
||||
print(f"SUCCESSFUL FETCH: {url}")
|
||||
print(f"FETCH CONTENT: {main_text[:140]}...")
|
||||
# .get_text() with separator and strip for cleaner output
|
||||
return main_text
|
||||
else:
|
||||
|
@ -145,7 +145,6 @@ async def fetch_site(url: str) -> str | None:
|
|||
body_text = soup.body.get_text(separator='\n', strip=True)
|
||||
body_text = clean_string(body_text)
|
||||
print(f"SUCCESSFUL FETCH: {url}")
|
||||
print(f"FETCH CONTENT: {body_text[:140]}...")
|
||||
return body_text
|
||||
|
||||
except Exception as e:
|
||||
|
@ -192,4 +191,9 @@ async def get_all_feed_contents() -> List[Dict[str, str]]:
|
|||
})
|
||||
|
||||
print(f"\nSuccessfully fetched {len(pages)} webpages.")
|
||||
with open("logs.json", "w") as f:
|
||||
json.dump({
|
||||
"urls":urls,
|
||||
"results": results
|
||||
}, f, indent = 4)
|
||||
return pages
|
|
@ -979,6 +979,14 @@ def view_sources():
|
|||
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('''
|
||||
CREATE TABLE IF NOT EXISTS sources (
|
||||
src_id SERIAL PRIMARY KEY,
|
||||
title VARCHAR(255) NOT NULL,
|
||||
link VARCHAR(255) NOT NULL,
|
||||
type VARCHAR(255) NOT NULL
|
||||
)
|
||||
''')
|
||||
cur.execute('SELECT * FROM sources ORDER BY src_id DESC')
|
||||
sources = cur.fetchall()
|
||||
except Exception as e:
|
||||
|
|
Loading…
Reference in New Issue