import json import sqlite3 import html import requests from http.server import HTTPServer, BaseHTTPRequestHandler from urllib.parse import parse_qs, urlparse, urljoin from bs4 import BeautifulSoup DATABASE = "index.db" def get_db(): db = sqlite3.connect(DATABASE) db.row_factory = sqlite3.Row return db def init_db(): db = sqlite3.connect(DATABASE) db.execute( "CREATE TABLE IF NOT EXISTS pages (" " id INTEGER PRIMARY KEY AUTOINCREMENT," " url TEXT UNIQUE NOT NULL," " title TEXT," " body TEXT," " note TEXT DEFAULT ''" ")" ) db.execute( "CREATE VIRTUAL TABLE IF NOT EXISTS pages_fts " "USING fts5(title, body, url, note, content=pages, content_rowid=id)" ) db.execute( "CREATE TABLE IF NOT EXISTS links (" " id INTEGER PRIMARY KEY AUTOINCREMENT," " page_id INTEGER NOT NULL," " url TEXT NOT NULL," " label TEXT," " FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE" ")" ) db.execute( "CREATE TABLE IF NOT EXISTS settings (" " key TEXT PRIMARY KEY," " value TEXT" ")" ) db.executescript(""" CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN INSERT INTO pages_fts(rowid, title, body, url, note) VALUES (new.id, new.title, new.body, new.url, new.note); END; CREATE TRIGGER IF NOT EXISTS pages_ad AFTER DELETE ON pages BEGIN INSERT INTO pages_fts(pages_fts, rowid, title, body, url, note) VALUES ('delete', old.id, old.title, old.body, old.url, old.note); END; CREATE TRIGGER IF NOT EXISTS pages_au AFTER UPDATE ON pages BEGIN INSERT INTO pages_fts(pages_fts, rowid, title, body, url, note) VALUES ('delete', old.id, old.title, old.body, old.url, old.note); INSERT INTO pages_fts(rowid, title, body, url, note) VALUES (new.id, new.title, new.body, new.url, new.note); END; """) db.commit() db.close() SKIP_EXT = (".png", ".jpg", ".jpeg", ".gif", ".svg", ".pdf", ".zip", ".mp3", ".mp4", ".css", ".js", ".ico", ".xml", ".json") def fetch_page(url): resp = requests.get(url, timeout=10, headers={"User-Agent": "TinyWeb/1.0"}, verify=False) resp.raise_for_status() soup = BeautifulSoup(resp.text, "html.parser") # extract links before stripping tags domain = urlparse(url).netloc seen = set() links = [] for a in soup.find_all("a", href=True): href = urljoin(url, a["href"]).split("#")[0] parsed = urlparse(href) if parsed.netloc != domain: continue if any(href.lower().endswith(ext) for ext in SKIP_EXT): continue if parsed.query or "action=" in href: continue path = parsed.path.lower() if any(s in path for s in ("/special:", "/talk:", "/user:", "/wikipedia:", "/help:", "/portal:", "/file:", "/category:")): continue if href in seen or href == url: continue seen.add(href) label = a.get_text(strip=True) or href links.append((href, label[:200])) for tag in soup(["script", "style", "nav", "footer", "header"]): tag.decompose() title = soup.title.string.strip() if soup.title and soup.title.string else url body = soup.get_text(separator=" ", strip=True) return title, body, links def snippet(text, query, ctx=80): pos = text.lower().find(query.lower()) if pos == -1: return text[:200] start = max(0, pos - ctx) end = min(len(text), pos + len(query) + ctx) return ("..." if start > 0 else "") + text[start:end] + ("..." if end < len(text) else "") def esc(s): return html.escape(str(s)) def get_setting(key, default=""): db = get_db() row = db.execute("SELECT value FROM settings WHERE key = ?", (key,)).fetchone() db.close() return row["value"] if row else default def set_setting(key, value): db = get_db() db.execute( "INSERT INTO settings (key, value) VALUES (?, ?) " "ON CONFLICT(key) DO UPDATE SET value=excluded.value", (key, value), ) db.commit() db.close() def get_site_name(): return get_setting("site_name", "tinyweb") def wrap_page(body_html): css = get_setting("custom_css") style = f"" if css else "" return f"{style}{body_html}" class Handler(BaseHTTPRequestHandler): def respond(self, body, status=200): self.send_response(status) self.send_header("Content-Type", "text/html; charset=utf-8") self.end_headers() self.wfile.write(wrap_page(body).encode()) def do_GET(self): parsed = urlparse(self.path) path = parsed.path params = parse_qs(parsed.query) if path == "/": self.handle_search(params) elif path == "/add": self.handle_add_form() elif path == "/pages": self.handle_pages() elif path.startswith("/delete/"): self.handle_delete(path) elif path.startswith("/edit/"): self.handle_edit_form(path) elif path == "/style": self.handle_style_form() elif path == "/bookmark": self.handle_bookmark(params) elif path == "/export": self.handle_export() elif path == "/import": self.handle_import_form() else: self.respond("

404

", 404) def do_POST(self): length = int(self.headers.get("Content-Length", 0)) body = self.rfile.read(length).decode() params = parse_qs(body) if self.path == "/add": self.handle_add_submit(params) elif self.path.startswith("/edit/"): self.handle_edit_submit(self.path, params) elif self.path == "/style": self.handle_style_submit(params) elif self.path == "/import": self.handle_import_submit(params) else: self.respond("

404

", 404) def handle_search(self, params): q = params.get("q", [""])[0].strip() db = get_db() count = db.execute("SELECT count(*) FROM pages").fetchone()[0] name = get_site_name() result_html = "" trusted_html = "" if q: rows = db.execute( "SELECT p.id, p.url, p.title, p.body, p.note " "FROM pages_fts f JOIN pages p ON f.rowid = p.id " "WHERE pages_fts MATCH ? ORDER BY rank LIMIT 50", (q,), ).fetchall() if rows: for r in rows: note_html = "" if r["note"]: note_html = f'
{esc(r["note"])}
' result_html += ( f'
' f'{esc(r["title"])}
' f'{esc(r["url"])}
' f'{esc(snippet(r["body"], q))}' f'{note_html}' f'
' ) else: result_html = "

No results in your index.

" # search all linked pages from trusted sites words = q.lower().split() all_links = db.execute( "SELECT l.url, l.label, p.title AS source_title " "FROM links l JOIN pages p ON l.page_id = p.id", ).fetchall() indexed_urls = set(r["url"] for r in rows) if rows else set() seen = set() trusted = [] for l in all_links: if l["url"] in indexed_urls or l["url"] in seen: continue if any(w in l["label"].lower() for w in words): seen.add(l["url"]) trusted.append(l) if len(trusted) >= 20: break if trusted: items = "" for l in trusted: items += ( f'
  • {esc(l["label"])} ' f'— from {esc(l["source_title"])}
  • ' ) trusted_html = ( f'
    ' f'from your trusted sites ({len(trusted)})' f'' f'
    ' ) db.close() self.respond( f'

    {esc(name)}

    ' f'
    ' f'' f' ' f'
    ' f'

    {count} page(s) indexed.' f' + add url' f' | browse' f' | customize

    ' f'
    {result_html}{trusted_html}' ) def handle_add_form(self, msg=""): self.respond( f"

    add url

    " f'
    ' f'

    ' f'

    ' f'' f"
    " f"

    {msg}

    " f'back' ) def handle_add_submit(self, params): url = params.get("url", [""])[0].strip() note = params.get("note", [""])[0].strip() if not url: return self.handle_add_form("URL is required.") if not url.startswith(("http://", "https://")): return self.handle_add_form("URL must start with http:// or https://") try: title, body, links = fetch_page(url) db = get_db() cur = db.execute( "INSERT INTO pages (url, title, body, note) VALUES (?, ?, ?, ?) " "ON CONFLICT(url) DO UPDATE SET title=excluded.title, body=excluded.body, note=excluded.note", (url, title, body, note), ) page_id = cur.lastrowid db.execute("DELETE FROM links WHERE page_id = ?", (page_id,)) for href, label in links: db.execute( "INSERT INTO links (page_id, url, label) VALUES (?, ?, ?)", (page_id, href, label), ) db.commit() db.close() self.handle_add_form(f'Indexed: {esc(title)}') except Exception as e: self.handle_add_form(f"Error: {esc(str(e))}") def handle_pages(self): db = get_db() rows = db.execute("SELECT id, url, title, note FROM pages ORDER BY id DESC").fetchall() db.close() items = "" for r in rows: note_html = f' — {esc(r["note"])}' if r["note"] else "" items += ( f'
  • {esc(r["title"])}{note_html} ' f'({esc(r["url"])}) ' f'edit ' f'remove
  • ' ) self.respond( f"

    indexed pages ({len(rows)})

    " f"" f'

    export | import

    ' f'back' ) def handle_edit_form(self, path, msg=""): try: page_id = int(path.split("/")[-1]) except ValueError: return self.respond("

    400

    ", 400) db = get_db() row = db.execute("SELECT id, url, title, note FROM pages WHERE id = ?", (page_id,)).fetchone() db.close() if not row: return self.respond("

    404

    ", 404) self.respond( f"

    edit note

    " f"

    {esc(row['title'])}
    " f"{esc(row['url'])}

    " f'
    ' f'

    ' f'' f"
    " f"

    {msg}

    " f'back' ) def handle_edit_submit(self, path, params): try: page_id = int(path.split("/")[-1]) except ValueError: return self.respond("

    400

    ", 400) note = params.get("note", [""])[0].strip() db = get_db() db.execute("UPDATE pages SET note = ? WHERE id = ?", (note, page_id)) db.commit() db.close() self.send_response(302) self.send_header("Location", "/pages") self.end_headers() def handle_delete(self, path): try: page_id = int(path.split("/")[-1]) except ValueError: return self.respond("

    400

    ", 400) db = get_db() db.execute("DELETE FROM links WHERE page_id = ?", (page_id,)) db.execute("DELETE FROM pages WHERE id = ?", (page_id,)) db.commit() db.close() self.send_response(302) self.send_header("Location", "/pages") self.end_headers() def handle_bookmark(self, params): url = params.get("url", [""])[0].strip() if not url or not url.startswith(("http://", "https://")): self.send_response(200) self.send_header("Content-Type", "text/plain") self.send_header("Access-Control-Allow-Origin", "*") self.end_headers() self.wfile.write(b"error: invalid url") return try: title, body, links = fetch_page(url) db = get_db() cur = db.execute( "INSERT INTO pages (url, title, body, note) VALUES (?, ?, ?, '') " "ON CONFLICT(url) DO UPDATE SET title=excluded.title, body=excluded.body", (url, title, body), ) page_id = cur.lastrowid db.execute("DELETE FROM links WHERE page_id = ?", (page_id,)) for href, label in links: db.execute( "INSERT INTO links (page_id, url, label) VALUES (?, ?, ?)", (page_id, href, label), ) db.commit() db.close() msg = f"ok: {title}" except Exception as e: msg = f"error: {e}" self.send_response(200) self.send_header("Content-Type", "text/plain") self.send_header("Access-Control-Allow-Origin", "*") self.end_headers() self.wfile.write(msg.encode()) def handle_export(self): db = get_db() rows = db.execute("SELECT url, title, note FROM pages ORDER BY id").fetchall() db.close() data = [{"url": r["url"], "title": r["title"], "note": r["note"]} for r in rows] self.send_response(200) self.send_header("Content-Type", "application/json") self.send_header("Content-Disposition", "attachment; filename=tinyweb-export.json") self.end_headers() self.wfile.write(json.dumps(data, indent=2).encode()) def handle_import_form(self, msg=""): self.respond( f"

    import

    " f"

    Paste the contents of a tinyweb export file (JSON).

    " f'
    ' f'

    ' f'' f"
    " f"

    {msg}

    " f'back' ) def handle_import_submit(self, params): raw = params.get("data", [""])[0].strip() if not raw: return self.handle_import_form("Paste JSON data.") try: data = json.loads(raw) except json.JSONDecodeError: return self.handle_import_form("Invalid JSON.") if not isinstance(data, list): return self.handle_import_form("Expected a JSON array.") imported = 0 errors = 0 for entry in data: url = entry.get("url", "").strip() note = entry.get("note", "").strip() if not url: continue try: title, body, links = fetch_page(url) db = get_db() cur = db.execute( "INSERT INTO pages (url, title, body, note) VALUES (?, ?, ?, ?) " "ON CONFLICT(url) DO UPDATE SET title=excluded.title, body=excluded.body, note=excluded.note", (url, title, body, note), ) page_id = cur.lastrowid db.execute("DELETE FROM links WHERE page_id = ?", (page_id,)) for href, label in links: db.execute( "INSERT INTO links (page_id, url, label) VALUES (?, ?, ?)", (page_id, href, label), ) db.commit() db.close() imported += 1 except Exception: errors += 1 self.handle_import_form(f"Imported {imported} page(s). {errors} error(s).") def handle_style_form(self, msg=""): css = get_setting("custom_css") name = get_site_name() self.respond( f"

    customize

    " f"

    name your search engine

    " f'
    ' f'

    ' f"

    custom css

    " f"

    Some classes you can target:

    " f"
    "
                f"body          - page background, font\n"
                f"h1            - page titles\n"
                f"input, button - search bar\n"
                f"a             - links\n"
                f".result       - each search result\n"
                f".note         - your notes on results\n"
                f".trusted      - trusted sites dropdown\n"
                f"small         - url text\n"
                f"ul, li        - browse page list"
                f"
    " f'

    ' f'' f"
    " f"

    bookmarklet

    " f"

    Drag this link to your bookmarks bar. Click it on any page to index it instantly.

    " f'

    + save to {esc(name)}

    ' f"

    {msg}

    " f'back' ) def handle_style_submit(self, params): css = params.get("css", [""])[0] name = params.get("site_name", ["tinyweb"])[0].strip() set_setting("custom_css", css) set_setting("site_name", name or "tinyweb") self.handle_style_form("Saved.") if __name__ == "__main__": init_db() print("running on http://localhost:5001") HTTPServer(("localhost", 5001), Handler).serve_forever()