tinyweb/db.py
Derick Phan f609f867ef
Migrate TinyWeb to Reticulum mesh network
Replace HTTP server with Reticulum-native architecture. The server
now speaks only Reticulum, with a client-side gateway providing
browser access by translating HTTP to/from RNS requests.

- Extract db layer (db.py), templates (templates.py), handlers (handlers.py)
- app.py is now the RNS server with persistent identity and destination
- gateway.py bridges HTTP on localhost:8080 to RNS link requests
- Add rns dependency, add .gitignore

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-25 22:18:24 -07:00

149 lines
4.7 KiB
Python

import sqlite3
import requests
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
DATABASE = "index.db"
SKIP_EXT = (".png", ".jpg", ".jpeg", ".gif", ".svg", ".pdf", ".zip", ".mp3", ".mp4", ".css", ".js", ".ico", ".xml", ".json")
def get_db():
db = sqlite3.connect(DATABASE)
db.row_factory = sqlite3.Row
return db
def init_db():
db = sqlite3.connect(DATABASE)
db.execute(
"CREATE TABLE IF NOT EXISTS pages ("
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
" url TEXT UNIQUE NOT NULL,"
" title TEXT,"
" body TEXT,"
" note TEXT DEFAULT ''"
")"
)
db.execute(
"CREATE VIRTUAL TABLE IF NOT EXISTS pages_fts "
"USING fts5(title, body, url, note, content=pages, content_rowid=id)"
)
db.execute(
"CREATE TABLE IF NOT EXISTS links ("
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
" page_id INTEGER NOT NULL,"
" url TEXT NOT NULL,"
" label TEXT,"
" FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE"
")"
)
db.execute(
"CREATE TABLE IF NOT EXISTS settings ("
" key TEXT PRIMARY KEY,"
" value TEXT"
")"
)
db.execute(
"CREATE TABLE IF NOT EXISTS subscriptions ("
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
" url TEXT UNIQUE NOT NULL,"
" name TEXT DEFAULT '',"
" auto_sync INTEGER DEFAULT 0,"
" last_sync TEXT DEFAULT ''"
")"
)
db.executescript("""
CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN
INSERT INTO pages_fts(rowid, title, body, url, note)
VALUES (new.id, new.title, new.body, new.url, new.note);
END;
CREATE TRIGGER IF NOT EXISTS pages_ad AFTER DELETE ON pages BEGIN
INSERT INTO pages_fts(pages_fts, rowid, title, body, url, note)
VALUES ('delete', old.id, old.title, old.body, old.url, old.note);
END;
CREATE TRIGGER IF NOT EXISTS pages_au AFTER UPDATE ON pages BEGIN
INSERT INTO pages_fts(pages_fts, rowid, title, body, url, note)
VALUES ('delete', old.id, old.title, old.body, old.url, old.note);
INSERT INTO pages_fts(rowid, title, body, url, note)
VALUES (new.id, new.title, new.body, new.url, new.note);
END;
""")
db.commit()
db.close()
def get_setting(key, default=""):
db = get_db()
row = db.execute("SELECT value FROM settings WHERE key = ?", (key,)).fetchone()
db.close()
return row["value"] if row else default
def set_setting(key, value):
db = get_db()
db.execute(
"INSERT INTO settings (key, value) VALUES (?, ?) "
"ON CONFLICT(key) DO UPDATE SET value=excluded.value",
(key, value),
)
db.commit()
db.close()
def get_site_name():
return get_setting("site_name", "tinyweb")
def fetch_page(url):
resp = requests.get(url, timeout=10, headers={"User-Agent": "TinyWeb/1.0"}, verify=False)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
# extract links before stripping tags
domain = urlparse(url).netloc
seen = set()
links = []
for a in soup.find_all("a", href=True):
href = urljoin(url, a["href"]).split("#")[0]
parsed = urlparse(href)
if parsed.netloc != domain:
continue
if any(href.lower().endswith(ext) for ext in SKIP_EXT):
continue
if parsed.query or "action=" in href:
continue
path = parsed.path.lower()
if any(s in path for s in ("/special:", "/talk:", "/user:", "/wikipedia:", "/help:", "/portal:", "/file:", "/category:")):
continue
if href in seen or href == url:
continue
seen.add(href)
label = a.get_text(strip=True) or href
links.append((href, label[:200]))
for tag in soup(["script", "style", "nav", "footer", "header"]):
tag.decompose()
title = soup.title.string.strip() if soup.title and soup.title.string else url
body = soup.get_text(separator=" ", strip=True)
return title, body, links
def index_url(url, note=""):
title, body, links = fetch_page(url)
db = get_db()
cur = db.execute(
"INSERT INTO pages (url, title, body, note) VALUES (?, ?, ?, ?) "
"ON CONFLICT(url) DO UPDATE SET title=excluded.title, body=excluded.body, note=excluded.note",
(url, title, body, note),
)
page_id = cur.lastrowid
db.execute("DELETE FROM links WHERE page_id = ?", (page_id,))
for href, label in links:
db.execute(
"INSERT INTO links (page_id, url, label) VALUES (?, ?, ?)",
(page_id, href, label),
)
db.commit()
db.close()
return title