diff --git a/db.py b/db.py
index 5a86a1c..b523c79 100644
--- a/db.py
+++ b/db.py
@@ -1,12 +1,26 @@
import sqlite3
import requests
-from urllib.parse import urlparse, urljoin
+from urllib.parse import urlparse, urljoin, parse_qs, urlencode, urlunparse
from bs4 import BeautifulSoup
DATABASE = "index.db"
SKIP_EXT = (".png", ".jpg", ".jpeg", ".gif", ".svg", ".pdf", ".zip", ".mp3", ".mp4", ".css", ".js", ".ico", ".xml", ".json")
+TRACKING_PARAMS = {
+ "utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content",
+ "fbclid", "gclid", "msclkid", "mc_cid", "mc_eid", "ref", "ref_src",
+ "ref_url", "_ga", "_gl", "yclid", "twclid", "igshid",
+}
+
+
+def clean_url(url):
+ parsed = urlparse(url)
+ params = parse_qs(parsed.query)
+ cleaned = {k: v for k, v in params.items() if k.lower() not in TRACKING_PARAMS}
+ new_query = urlencode(cleaned, doseq=True)
+ return urlunparse(parsed._replace(query=new_query))
+
def get_db():
db = sqlite3.connect(DATABASE)
@@ -60,6 +74,7 @@ def init_db():
" url TEXT NOT NULL,"
" title TEXT,"
" note TEXT DEFAULT '',"
+ " tags TEXT DEFAULT '',"
" FOREIGN KEY (subscription_id) REFERENCES subscriptions(id) ON DELETE CASCADE,"
" UNIQUE(subscription_id, url)"
")"
@@ -68,6 +83,21 @@ def init_db():
"CREATE VIRTUAL TABLE IF NOT EXISTS remote_pages_fts "
"USING fts5(title, url, note, content=remote_pages, content_rowid=id)"
)
+ db.execute(
+ "CREATE TABLE IF NOT EXISTS tags ("
+ " id INTEGER PRIMARY KEY AUTOINCREMENT,"
+ " name TEXT UNIQUE NOT NULL"
+ ")"
+ )
+ db.execute(
+ "CREATE TABLE IF NOT EXISTS page_tags ("
+ " page_id INTEGER NOT NULL,"
+ " tag_id INTEGER NOT NULL,"
+ " PRIMARY KEY (page_id, tag_id),"
+ " FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,"
+ " FOREIGN KEY (tag_id) REFERENCES tags(id) ON DELETE CASCADE"
+ ")"
+ )
db.executescript("""
CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN
INSERT INTO pages_fts(rowid, title, body, url, note)
@@ -104,6 +134,12 @@ def init_db():
db.execute("ALTER TABLE subscriptions RENAME COLUMN url TO dest_hash")
db.commit()
+ # Migrate remote_pages: add tags column if missing
+ rp_cols = [row[1] for row in db.execute("PRAGMA table_info(remote_pages)").fetchall()]
+ if "tags" not in rp_cols:
+ db.execute("ALTER TABLE remote_pages ADD COLUMN tags TEXT DEFAULT ''")
+ db.commit()
+
db.commit()
db.close()
@@ -165,6 +201,7 @@ def fetch_page(url):
def index_url(url, note=""):
+ url = clean_url(url)
title, body, links = fetch_page(url)
db = get_db()
cur = db.execute(
diff --git a/handlers.py b/handlers.py
index 4550e15..42944a5 100644
--- a/handlers.py
+++ b/handlers.py
@@ -1,7 +1,7 @@
import json
from datetime import datetime
-from db import get_db, get_setting, set_setting, get_site_name, index_url
+from db import get_db, get_setting, set_setting, get_site_name, index_url, clean_url
from templates import esc, snippet, wrap_page
from rns_client import fetch_remote_sites
@@ -46,6 +46,38 @@ def _error(status):
return _respond(f"
{status}
", status)
+# --- Tag helpers ---
+
+
+def _get_page_tags(page_id, db=None):
+ close = False
+ if db is None:
+ db = get_db()
+ close = True
+ rows = db.execute(
+ "SELECT t.name FROM tags t JOIN page_tags pt ON t.id = pt.tag_id "
+ "WHERE pt.page_id = ? ORDER BY t.name", (page_id,)
+ ).fetchall()
+ if close:
+ db.close()
+ return [r["name"] for r in rows]
+
+
+def _set_page_tags(page_id, tag_string, db=None):
+ close = False
+ if db is None:
+ db = get_db()
+ close = True
+ db.execute("DELETE FROM page_tags WHERE page_id = ?", (page_id,))
+ for name in (t.strip().lower() for t in tag_string.split(",") if t.strip()):
+ db.execute("INSERT OR IGNORE INTO tags (name) VALUES (?)", (name,))
+ tag_id = db.execute("SELECT id FROM tags WHERE name = ?", (name,)).fetchone()["id"]
+ db.execute("INSERT OR IGNORE INTO page_tags (page_id, tag_id) VALUES (?, ?)", (page_id, tag_id))
+ if close:
+ db.commit()
+ db.close()
+
+
# --- Route handlers ---
@@ -69,12 +101,17 @@ def handle_search(query):
note_html = ""
if r["note"]:
note_html = f'
{esc(r["note"])}
'
+ tags = _get_page_tags(r["id"], db)
+ tags_html = ""
+ if tags:
+ tag_links = " ".join(f'[{esc(t)}]' for t in tags)
+ tags_html = f'