stripped tracking params, added tags
URLs are cleaned of tracking parameters (utm_*, fbclid, gclid, etc.) before indexing. Tags can be added when saving or editing pages, browsed at /tags, and are included in search results. Tags are shared via /api/sites and preserved when syncing/importing from subscriptions.
This commit is contained in:
parent
c5d8d350a6
commit
acfa9f6d4f
2 changed files with 192 additions and 27 deletions
180
handlers.py
180
handlers.py
|
|
@ -1,7 +1,7 @@
|
|||
import json
|
||||
from datetime import datetime
|
||||
|
||||
from db import get_db, get_setting, set_setting, get_site_name, index_url
|
||||
from db import get_db, get_setting, set_setting, get_site_name, index_url, clean_url
|
||||
from templates import esc, snippet, wrap_page
|
||||
from rns_client import fetch_remote_sites
|
||||
|
||||
|
|
@ -46,6 +46,38 @@ def _error(status):
|
|||
return _respond(f"<h1>{status}</h1>", status)
|
||||
|
||||
|
||||
# --- Tag helpers ---
|
||||
|
||||
|
||||
def _get_page_tags(page_id, db=None):
|
||||
close = False
|
||||
if db is None:
|
||||
db = get_db()
|
||||
close = True
|
||||
rows = db.execute(
|
||||
"SELECT t.name FROM tags t JOIN page_tags pt ON t.id = pt.tag_id "
|
||||
"WHERE pt.page_id = ? ORDER BY t.name", (page_id,)
|
||||
).fetchall()
|
||||
if close:
|
||||
db.close()
|
||||
return [r["name"] for r in rows]
|
||||
|
||||
|
||||
def _set_page_tags(page_id, tag_string, db=None):
|
||||
close = False
|
||||
if db is None:
|
||||
db = get_db()
|
||||
close = True
|
||||
db.execute("DELETE FROM page_tags WHERE page_id = ?", (page_id,))
|
||||
for name in (t.strip().lower() for t in tag_string.split(",") if t.strip()):
|
||||
db.execute("INSERT OR IGNORE INTO tags (name) VALUES (?)", (name,))
|
||||
tag_id = db.execute("SELECT id FROM tags WHERE name = ?", (name,)).fetchone()["id"]
|
||||
db.execute("INSERT OR IGNORE INTO page_tags (page_id, tag_id) VALUES (?, ?)", (page_id, tag_id))
|
||||
if close:
|
||||
db.commit()
|
||||
db.close()
|
||||
|
||||
|
||||
# --- Route handlers ---
|
||||
|
||||
|
||||
|
|
@ -69,12 +101,17 @@ def handle_search(query):
|
|||
note_html = ""
|
||||
if r["note"]:
|
||||
note_html = f'<div class="note"><em>{esc(r["note"])}</em></div>'
|
||||
tags = _get_page_tags(r["id"], db)
|
||||
tags_html = ""
|
||||
if tags:
|
||||
tag_links = " ".join(f'<a href="/tags/{esc(t)}" class="tag">[{esc(t)}]</a>' for t in tags)
|
||||
tags_html = f'<div class="tags">{tag_links}</div>'
|
||||
result_html += (
|
||||
f'<div class="result">'
|
||||
f'<a href="{esc(r["url"])}">{esc(r["title"])}</a><br>'
|
||||
f'<small>{esc(r["url"])}</small><br>'
|
||||
f'{esc(snippet(r["body"], q))}'
|
||||
f'{note_html}'
|
||||
f'{note_html}{tags_html}'
|
||||
f'</div>'
|
||||
)
|
||||
else:
|
||||
|
|
@ -157,6 +194,7 @@ def handle_search(query):
|
|||
f'<p>{count} page(s) indexed.'
|
||||
f' <a href="/add">+ add url</a>'
|
||||
f' | <a href="/pages">browse</a>'
|
||||
f' | <a href="/tags">tags</a>'
|
||||
f' | <a href="/subscriptions">subscriptions</a>'
|
||||
f' | <a href="/style">customize</a></p>'
|
||||
f'<hr>{result_html}{trusted_html}{remote_html}'
|
||||
|
|
@ -169,6 +207,7 @@ def handle_add_form(msg=""):
|
|||
f'<form method="post" action="/add">'
|
||||
f'<input name="url" placeholder="https://example.com" size="50"><br><br>'
|
||||
f'<input name="note" placeholder="why are you saving this? (optional)" size="50"><br><br>'
|
||||
f'<input name="tags" placeholder="tags (comma-separated, e.g. solarpunk, mesh)" size="50"><br><br>'
|
||||
f'<button type="submit">index</button>'
|
||||
f"</form>"
|
||||
f"<p>{msg}</p>"
|
||||
|
|
@ -177,14 +216,22 @@ def handle_add_form(msg=""):
|
|||
|
||||
|
||||
def handle_add_submit(body):
|
||||
url = body.get("url", [""])[0].strip()
|
||||
url = clean_url(body.get("url", [""])[0].strip())
|
||||
note = body.get("note", [""])[0].strip()
|
||||
tags = body.get("tags", [""])[0].strip()
|
||||
if not url:
|
||||
return handle_add_form("URL is required.")
|
||||
if not url.startswith(("http://", "https://")):
|
||||
return handle_add_form("URL must start with http:// or https://")
|
||||
try:
|
||||
title = index_url(url, note)
|
||||
if tags:
|
||||
db = get_db()
|
||||
row = db.execute("SELECT id FROM pages WHERE url = ?", (url,)).fetchone()
|
||||
if row:
|
||||
_set_page_tags(row["id"], tags, db)
|
||||
db.commit()
|
||||
db.close()
|
||||
return handle_add_form(f'Indexed: <a href="{esc(url)}">{esc(title)}</a>')
|
||||
except Exception as e:
|
||||
return handle_add_form(f"Error: {esc(str(e))}")
|
||||
|
|
@ -193,16 +240,21 @@ def handle_add_submit(body):
|
|||
def handle_pages():
|
||||
db = get_db()
|
||||
rows = db.execute("SELECT id, url, title, note FROM pages ORDER BY id DESC").fetchall()
|
||||
db.close()
|
||||
items = ""
|
||||
for r in rows:
|
||||
note_html = f' — <em>{esc(r["note"])}</em>' if r["note"] else ""
|
||||
tags = _get_page_tags(r["id"], db)
|
||||
tags_html = ""
|
||||
if tags:
|
||||
tag_links = " ".join(f'<a href="/tags/{esc(t)}">[{esc(t)}]</a>' for t in tags)
|
||||
tags_html = f' {tag_links}'
|
||||
items += (
|
||||
f'<li>{esc(r["title"])}{note_html} '
|
||||
f'<li>{esc(r["title"])}{note_html}{tags_html} '
|
||||
f'<small>(<a href="{esc(r["url"])}">{esc(r["url"])}</a>)</small> '
|
||||
f'<a href="/edit/{r["id"]}">edit</a> '
|
||||
f'<a href="/delete/{r["id"]}">remove</a></li>'
|
||||
)
|
||||
db.close()
|
||||
return _respond(
|
||||
f"<h1>indexed pages ({len(rows)})</h1>"
|
||||
f"<ul>{items}</ul>"
|
||||
|
|
@ -214,15 +266,18 @@ def handle_pages():
|
|||
def handle_edit_form(page_id, msg=""):
|
||||
db = get_db()
|
||||
row = db.execute("SELECT id, url, title, note FROM pages WHERE id = ?", (page_id,)).fetchone()
|
||||
db.close()
|
||||
if not row:
|
||||
db.close()
|
||||
return _error(404)
|
||||
tags = ", ".join(_get_page_tags(page_id, db))
|
||||
db.close()
|
||||
return _respond(
|
||||
f"<h1>edit note</h1>"
|
||||
f"<h1>edit page</h1>"
|
||||
f"<p><b>{esc(row['title'])}</b><br>"
|
||||
f"<small>{esc(row['url'])}</small></p>"
|
||||
f'<form method="post" action="/edit/{row["id"]}">'
|
||||
f'<input name="note" value="{esc(row["note"])}" placeholder="why did you save this?" size="50"><br><br>'
|
||||
f'<input name="tags" value="{esc(tags)}" placeholder="tags (comma-separated)" size="50"><br><br>'
|
||||
f'<button type="submit">save</button>'
|
||||
f"</form>"
|
||||
f"<p>{msg}</p>"
|
||||
|
|
@ -232,8 +287,10 @@ def handle_edit_form(page_id, msg=""):
|
|||
|
||||
def handle_edit_submit(page_id, body):
|
||||
note = body.get("note", [""])[0].strip()
|
||||
tags = body.get("tags", [""])[0].strip()
|
||||
db = get_db()
|
||||
db.execute("UPDATE pages SET note = ? WHERE id = ?", (note, page_id))
|
||||
_set_page_tags(page_id, tags, db)
|
||||
db.commit()
|
||||
db.close()
|
||||
return _redirect("/pages")
|
||||
|
|
@ -249,7 +306,7 @@ def handle_delete(page_id):
|
|||
|
||||
|
||||
def handle_bookmark(query):
|
||||
url = query.get("url", [""])[0].strip()
|
||||
url = clean_url(query.get("url", [""])[0].strip())
|
||||
if not url or not url.startswith(("http://", "https://")):
|
||||
return _text_response("error: invalid url", headers={"Access-Control-Allow-Origin": "*"})
|
||||
try:
|
||||
|
|
@ -355,6 +412,51 @@ def handle_style_submit(body):
|
|||
return handle_style_form("Saved.")
|
||||
|
||||
|
||||
def handle_tags():
|
||||
db = get_db()
|
||||
rows = db.execute(
|
||||
"SELECT t.name, COUNT(pt.page_id) AS cnt FROM tags t "
|
||||
"JOIN page_tags pt ON t.id = pt.tag_id "
|
||||
"GROUP BY t.id ORDER BY t.name"
|
||||
).fetchall()
|
||||
db.close()
|
||||
items = ""
|
||||
for r in rows:
|
||||
items += f'<li><a href="/tags/{esc(r["name"])}">{esc(r["name"])}</a> ({r["cnt"]})</li>'
|
||||
return _respond(
|
||||
f"<h1>tags</h1>"
|
||||
f"<ul>{items}</ul>" if items else "<p>No tags yet. Add tags when saving or editing pages.</p>"
|
||||
f'<a href="/">back</a>'
|
||||
)
|
||||
|
||||
|
||||
def handle_tag_browse(tag_name):
|
||||
db = get_db()
|
||||
rows = db.execute(
|
||||
"SELECT p.id, p.url, p.title, p.note FROM pages p "
|
||||
"JOIN page_tags pt ON p.id = pt.page_id "
|
||||
"JOIN tags t ON t.id = pt.tag_id "
|
||||
"WHERE t.name = ? ORDER BY p.id DESC",
|
||||
(tag_name,),
|
||||
).fetchall()
|
||||
items = ""
|
||||
for r in rows:
|
||||
note_html = f' — <em>{esc(r["note"])}</em>' if r["note"] else ""
|
||||
tags = _get_page_tags(r["id"], db)
|
||||
tag_links = " ".join(f'<a href="/tags/{esc(t)}">[{esc(t)}]</a>' for t in tags)
|
||||
items += (
|
||||
f'<li>{esc(r["title"])}{note_html} {tag_links} '
|
||||
f'<small>(<a href="{esc(r["url"])}">{esc(r["url"])}</a>)</small></li>'
|
||||
)
|
||||
db.close()
|
||||
return _respond(
|
||||
f'<h1>tag: {esc(tag_name)}</h1>'
|
||||
f'<p>{len(rows)} page(s)</p>'
|
||||
f'<ul>{items}</ul>'
|
||||
f'<a href="/tags">all tags</a> | <a href="/">back</a>'
|
||||
)
|
||||
|
||||
|
||||
def handle_api_sites():
|
||||
if get_setting("sharing_enabled", "0") != "1":
|
||||
return _json_response(
|
||||
|
|
@ -363,12 +465,13 @@ def handle_api_sites():
|
|||
headers={"Access-Control-Allow-Origin": "*"},
|
||||
)
|
||||
db = get_db()
|
||||
rows = db.execute("SELECT url, title, note FROM pages ORDER BY id DESC").fetchall()
|
||||
rows = db.execute("SELECT id, url, title, note FROM pages ORDER BY id DESC").fetchall()
|
||||
sites = []
|
||||
for r in rows:
|
||||
tags = _get_page_tags(r["id"], db)
|
||||
sites.append({"url": r["url"], "title": r["title"], "note": r["note"], "tags": tags})
|
||||
db.close()
|
||||
data = {
|
||||
"name": get_site_name(),
|
||||
"sites": [{"url": r["url"], "title": r["title"], "note": r["note"]} for r in rows],
|
||||
}
|
||||
data = {"name": get_site_name(), "sites": sites}
|
||||
return _json_response(data, headers={"Access-Control-Allow-Origin": "*"})
|
||||
|
||||
|
||||
|
|
@ -455,13 +558,16 @@ def handle_subscription_browse(sub_id):
|
|||
|
||||
# Use locally synced data if available, otherwise fetch live
|
||||
remote_rows = db.execute(
|
||||
"SELECT url, title, note FROM remote_pages WHERE subscription_id = ?",
|
||||
"SELECT url, title, note, tags FROM remote_pages WHERE subscription_id = ?",
|
||||
(sub_id,),
|
||||
).fetchall()
|
||||
db.close()
|
||||
|
||||
if remote_rows:
|
||||
sites = [{"url": r["url"], "title": r["title"], "note": r["note"]} for r in remote_rows]
|
||||
sites = []
|
||||
for r in remote_rows:
|
||||
tags = [t for t in r["tags"].split(",") if t] if r["tags"] else []
|
||||
sites.append({"url": r["url"], "title": r["title"], "note": r["note"], "tags": tags})
|
||||
else:
|
||||
try:
|
||||
data = fetch_remote_sites(sub["dest_hash"])
|
||||
|
|
@ -483,9 +589,12 @@ def handle_subscription_browse(sub_id):
|
|||
else:
|
||||
new_count += 1
|
||||
note_html = f' — <em>{esc(s["note"])}</em>' if s.get("note") else ""
|
||||
tags_html = ""
|
||||
if s.get("tags"):
|
||||
tags_html = " " + " ".join(f'[{esc(t)}]' for t in s["tags"])
|
||||
new_items += (
|
||||
f'<li><label><input type="checkbox" name="urls" value="{esc(s["url"])}">'
|
||||
f' {esc(s["title"])}{note_html}'
|
||||
f' {esc(s["title"])}{note_html}{tags_html}'
|
||||
f' <small>({esc(s["url"])})</small></label></li>'
|
||||
)
|
||||
|
||||
|
|
@ -509,16 +618,19 @@ def handle_subscription_pick(body):
|
|||
sub_id = body.get("sub_id", [""])[0]
|
||||
import_all = body.get("import_all", [""])[0]
|
||||
|
||||
# Build a url->tags map from remote_pages for this subscription
|
||||
db = get_db()
|
||||
remote_rows = db.execute(
|
||||
"SELECT url, tags FROM remote_pages WHERE subscription_id = ?", (sub_id,)
|
||||
).fetchall()
|
||||
remote_tags = {r["url"]: r["tags"] for r in remote_rows}
|
||||
|
||||
if import_all:
|
||||
db = get_db()
|
||||
local_urls = set(r["url"] for r in db.execute("SELECT url FROM pages").fetchall())
|
||||
remote = db.execute(
|
||||
"SELECT url FROM remote_pages WHERE subscription_id = ?", (sub_id,)
|
||||
).fetchall()
|
||||
db.close()
|
||||
urls = [r["url"] for r in remote if r["url"] not in local_urls]
|
||||
urls = [r["url"] for r in remote_rows if r["url"] not in local_urls]
|
||||
else:
|
||||
urls = body.get("urls", [])
|
||||
db.close()
|
||||
|
||||
if not urls:
|
||||
return handle_subscriptions("No sites selected.")
|
||||
|
|
@ -528,6 +640,15 @@ def handle_subscription_pick(body):
|
|||
for url in urls:
|
||||
try:
|
||||
index_url(url)
|
||||
# Import tags from the remote page
|
||||
tags_str = remote_tags.get(url, "")
|
||||
if tags_str:
|
||||
db = get_db()
|
||||
row = db.execute("SELECT id FROM pages WHERE url = ?", (url,)).fetchone()
|
||||
if row:
|
||||
_set_page_tags(row["id"], tags_str, db)
|
||||
db.commit()
|
||||
db.close()
|
||||
imported += 1
|
||||
except Exception:
|
||||
errors += 1
|
||||
|
|
@ -556,9 +677,10 @@ def handle_subscription_sync(sub_id):
|
|||
synced = 0
|
||||
for s in sites:
|
||||
try:
|
||||
tags_str = ",".join(s.get("tags", []))
|
||||
db.execute(
|
||||
"INSERT INTO remote_pages (subscription_id, url, title, note) VALUES (?, ?, ?, ?)",
|
||||
(sub_id, s["url"], s["title"], s.get("note", "")),
|
||||
"INSERT INTO remote_pages (subscription_id, url, title, note, tags) VALUES (?, ?, ?, ?, ?)",
|
||||
(sub_id, s["url"], s["title"], s.get("note", ""), tags_str),
|
||||
)
|
||||
synced += 1
|
||||
except Exception:
|
||||
|
|
@ -602,9 +724,10 @@ def handle_subscription_syncall():
|
|||
db.execute("DELETE FROM remote_pages WHERE subscription_id = ?", (sub["id"],))
|
||||
for s in sites:
|
||||
try:
|
||||
tags_str = ",".join(s.get("tags", []))
|
||||
db.execute(
|
||||
"INSERT INTO remote_pages (subscription_id, url, title, note) VALUES (?, ?, ?, ?)",
|
||||
(sub["id"], s["url"], s["title"], s.get("note", "")),
|
||||
"INSERT INTO remote_pages (subscription_id, url, title, note, tags) VALUES (?, ?, ?, ?, ?)",
|
||||
(sub["id"], s["url"], s["title"], s.get("note", ""), tags_str),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
|
@ -655,6 +778,11 @@ def dispatch_request(data):
|
|||
return handle_export()
|
||||
elif path == "/import":
|
||||
return handle_import_form()
|
||||
elif path == "/tags":
|
||||
return handle_tags()
|
||||
elif path.startswith("/tags/"):
|
||||
tag_name = path[len("/tags/"):]
|
||||
return handle_tag_browse(tag_name) if tag_name else _error(400)
|
||||
elif path == "/api/sites":
|
||||
return handle_api_sites()
|
||||
elif path == "/subscriptions":
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue