Replace HTTP server with Reticulum-native architecture. The server now speaks only Reticulum, with a client-side gateway providing browser access by translating HTTP to/from RNS requests. - Extract db layer (db.py), templates (templates.py), handlers (handlers.py) - app.py is now the RNS server with persistent identity and destination - gateway.py bridges HTTP on localhost:8080 to RNS link requests - Add rns dependency, add .gitignore
149 lines
4.7 KiB
Python
149 lines
4.7 KiB
Python
import sqlite3
|
|
import requests
|
|
from urllib.parse import urlparse, urljoin
|
|
from bs4 import BeautifulSoup
|
|
|
|
DATABASE = "index.db"
|
|
|
|
SKIP_EXT = (".png", ".jpg", ".jpeg", ".gif", ".svg", ".pdf", ".zip", ".mp3", ".mp4", ".css", ".js", ".ico", ".xml", ".json")
|
|
|
|
|
|
def get_db():
|
|
db = sqlite3.connect(DATABASE)
|
|
db.row_factory = sqlite3.Row
|
|
return db
|
|
|
|
|
|
def init_db():
|
|
db = sqlite3.connect(DATABASE)
|
|
db.execute(
|
|
"CREATE TABLE IF NOT EXISTS pages ("
|
|
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
|
|
" url TEXT UNIQUE NOT NULL,"
|
|
" title TEXT,"
|
|
" body TEXT,"
|
|
" note TEXT DEFAULT ''"
|
|
")"
|
|
)
|
|
db.execute(
|
|
"CREATE VIRTUAL TABLE IF NOT EXISTS pages_fts "
|
|
"USING fts5(title, body, url, note, content=pages, content_rowid=id)"
|
|
)
|
|
db.execute(
|
|
"CREATE TABLE IF NOT EXISTS links ("
|
|
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
|
|
" page_id INTEGER NOT NULL,"
|
|
" url TEXT NOT NULL,"
|
|
" label TEXT,"
|
|
" FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE"
|
|
")"
|
|
)
|
|
db.execute(
|
|
"CREATE TABLE IF NOT EXISTS settings ("
|
|
" key TEXT PRIMARY KEY,"
|
|
" value TEXT"
|
|
")"
|
|
)
|
|
db.execute(
|
|
"CREATE TABLE IF NOT EXISTS subscriptions ("
|
|
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
|
|
" url TEXT UNIQUE NOT NULL,"
|
|
" name TEXT DEFAULT '',"
|
|
" auto_sync INTEGER DEFAULT 0,"
|
|
" last_sync TEXT DEFAULT ''"
|
|
")"
|
|
)
|
|
db.executescript("""
|
|
CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN
|
|
INSERT INTO pages_fts(rowid, title, body, url, note)
|
|
VALUES (new.id, new.title, new.body, new.url, new.note);
|
|
END;
|
|
CREATE TRIGGER IF NOT EXISTS pages_ad AFTER DELETE ON pages BEGIN
|
|
INSERT INTO pages_fts(pages_fts, rowid, title, body, url, note)
|
|
VALUES ('delete', old.id, old.title, old.body, old.url, old.note);
|
|
END;
|
|
CREATE TRIGGER IF NOT EXISTS pages_au AFTER UPDATE ON pages BEGIN
|
|
INSERT INTO pages_fts(pages_fts, rowid, title, body, url, note)
|
|
VALUES ('delete', old.id, old.title, old.body, old.url, old.note);
|
|
INSERT INTO pages_fts(rowid, title, body, url, note)
|
|
VALUES (new.id, new.title, new.body, new.url, new.note);
|
|
END;
|
|
""")
|
|
db.commit()
|
|
db.close()
|
|
|
|
|
|
def get_setting(key, default=""):
|
|
db = get_db()
|
|
row = db.execute("SELECT value FROM settings WHERE key = ?", (key,)).fetchone()
|
|
db.close()
|
|
return row["value"] if row else default
|
|
|
|
|
|
def set_setting(key, value):
|
|
db = get_db()
|
|
db.execute(
|
|
"INSERT INTO settings (key, value) VALUES (?, ?) "
|
|
"ON CONFLICT(key) DO UPDATE SET value=excluded.value",
|
|
(key, value),
|
|
)
|
|
db.commit()
|
|
db.close()
|
|
|
|
|
|
def get_site_name():
|
|
return get_setting("site_name", "tinyweb")
|
|
|
|
|
|
def fetch_page(url):
|
|
resp = requests.get(url, timeout=10, headers={"User-Agent": "TinyWeb/1.0"}, verify=False)
|
|
resp.raise_for_status()
|
|
soup = BeautifulSoup(resp.text, "html.parser")
|
|
|
|
# extract links before stripping tags
|
|
domain = urlparse(url).netloc
|
|
seen = set()
|
|
links = []
|
|
for a in soup.find_all("a", href=True):
|
|
href = urljoin(url, a["href"]).split("#")[0]
|
|
parsed = urlparse(href)
|
|
if parsed.netloc != domain:
|
|
continue
|
|
if any(href.lower().endswith(ext) for ext in SKIP_EXT):
|
|
continue
|
|
if parsed.query or "action=" in href:
|
|
continue
|
|
path = parsed.path.lower()
|
|
if any(s in path for s in ("/special:", "/talk:", "/user:", "/wikipedia:", "/help:", "/portal:", "/file:", "/category:")):
|
|
continue
|
|
if href in seen or href == url:
|
|
continue
|
|
seen.add(href)
|
|
label = a.get_text(strip=True) or href
|
|
links.append((href, label[:200]))
|
|
|
|
for tag in soup(["script", "style", "nav", "footer", "header"]):
|
|
tag.decompose()
|
|
title = soup.title.string.strip() if soup.title and soup.title.string else url
|
|
body = soup.get_text(separator=" ", strip=True)
|
|
return title, body, links
|
|
|
|
|
|
def index_url(url, note=""):
|
|
title, body, links = fetch_page(url)
|
|
db = get_db()
|
|
cur = db.execute(
|
|
"INSERT INTO pages (url, title, body, note) VALUES (?, ?, ?, ?) "
|
|
"ON CONFLICT(url) DO UPDATE SET title=excluded.title, body=excluded.body, note=excluded.note",
|
|
(url, title, body, note),
|
|
)
|
|
page_id = cur.lastrowid
|
|
db.execute("DELETE FROM links WHERE page_id = ?", (page_id,))
|
|
for href, label in links:
|
|
db.execute(
|
|
"INSERT INTO links (page_id, url, label) VALUES (?, ?, ?)",
|
|
(page_id, href, label),
|
|
)
|
|
db.commit()
|
|
db.close()
|
|
return title
|