added reticulum hash option to add page

This commit is contained in:
lichenblankie 2026-03-30 22:36:58 +00:00
parent a1358c1f3d
commit b112ee3660
2 changed files with 44 additions and 19 deletions

17
db.py
View file

@ -131,7 +131,8 @@ def init_db():
" title TEXT," " title TEXT,"
" body TEXT," " body TEXT,"
" note TEXT DEFAULT ''," " note TEXT DEFAULT '',"
" last_modified TEXT DEFAULT (strftime('%Y-%m-%dT%H:%M:%S','now'))" " last_modified TEXT DEFAULT (strftime('%Y-%m-%dT%H:%M:%S','now')),"
" reticulum_dest TEXT DEFAULT ''"
")" ")"
) )
db.execute( db.execute(
@ -247,6 +248,11 @@ def init_db():
db.execute("ALTER TABLE pages ADD COLUMN summary TEXT DEFAULT ''") db.execute("ALTER TABLE pages ADD COLUMN summary TEXT DEFAULT ''")
db.commit() db.commit()
# Migrate pages: add reticulum_dest column if missing
if "reticulum_dest" not in page_cols:
db.execute("ALTER TABLE pages ADD COLUMN reticulum_dest TEXT DEFAULT ''")
db.commit()
# Chunks table for semantic search embeddings # Chunks table for semantic search embeddings
db.execute( db.execute(
"CREATE TABLE IF NOT EXISTS chunks (" "CREATE TABLE IF NOT EXISTS chunks ("
@ -359,19 +365,18 @@ def fetch_page(url):
def index_url(url, note=""): def index_url(url, note="", reticulum_dest=""):
url = clean_url(url) url = clean_url(url)
title, body, links, meta_desc = fetch_page(url) title, body, links, meta_desc = fetch_page(url)
# Use meta description if available and meaningful, otherwise generate from body
summary = meta_desc if meta_desc and len(meta_desc) > 20 else "" summary = meta_desc if meta_desc and len(meta_desc) > 20 else ""
db = get_db() db = get_db()
try: try:
now = __import__("datetime").datetime.now().strftime("%Y-%m-%dT%H:%M:%S") now = __import__("datetime").datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
db.execute( db.execute(
"INSERT INTO pages (url, title, body, note, last_modified, summary) VALUES (?, ?, ?, ?, ?, ?) " "INSERT INTO pages (url, title, body, note, last_modified, summary, reticulum_dest) VALUES (?, ?, ?, ?, ?, ?, ?) "
"ON CONFLICT(url) DO UPDATE SET title=excluded.title, body=excluded.body, " "ON CONFLICT(url) DO UPDATE SET title=excluded.title, body=excluded.body, "
"note=excluded.note, last_modified=excluded.last_modified, summary=excluded.summary", "note=excluded.note, last_modified=excluded.last_modified, summary=excluded.summary, reticulum_dest=excluded.reticulum_dest",
(url, title, body, note, now, summary), (url, title, body, note, now, summary, reticulum_dest),
) )
page_id = db.execute("SELECT id FROM pages WHERE url = ?", (url,)).fetchone()[0] page_id = db.execute("SELECT id FROM pages WHERE url = ?", (url,)).fetchone()[0]
db.execute("DELETE FROM links WHERE page_id = ?", (page_id,)) db.execute("DELETE FROM links WHERE page_id = ?", (page_id,))

View file

@ -341,9 +341,12 @@ def handle_search(query):
def handle_add_form(msg=""): def handle_add_form(msg=""):
return _respond( return _respond(
f"<h1>add url</h1>" f"<h1>add url</h1>"
f"<p>Add a site via URL or Reticulum destination hash</p>"
f'<form method="post" action="/add">' f'<form method="post" action="/add">'
f'{_csrf_field()}' f'{_csrf_field()}'
f'<input name="url" placeholder="https://example.com" size="50"><br><br>' f'<input name="url" placeholder="https://example.com" size="50"><br>'
f'<small>or</small><br>'
f'<input name="reticulum_dest" placeholder="reticulum destination hash (32 hex chars)" size="50"><br><br>'
f'<input name="note" placeholder="why are you saving this? (optional)" size="50"><br><br>' f'<input name="note" placeholder="why are you saving this? (optional)" size="50"><br><br>'
f'<input name="tags" placeholder="tags (comma-separated, e.g. solarpunk, mesh)" size="50"><br><br>' f'<input name="tags" placeholder="tags (comma-separated, e.g. solarpunk, mesh)" size="50"><br><br>'
f'<button type="submit">index</button>' f'<button type="submit">index</button>'
@ -357,15 +360,25 @@ def handle_add_submit(body):
url = clean_url(body.get("url", [""])[0].strip()) url = clean_url(body.get("url", [""])[0].strip())
note = body.get("note", [""])[0].strip() note = body.get("note", [""])[0].strip()
tags = body.get("tags", [""])[0].strip() tags = body.get("tags", [""])[0].strip()
reticulum_dest = body.get("reticulum_dest", [""])[0].strip().replace("<", "").replace(">", "")
if not url: if reticulum_dest and url:
return handle_add_form("URL is required.") return handle_add_form("Please provide either a URL or a Reticulum destination hash, not both.")
if not url.startswith(("http://", "https://")):
if not url and not reticulum_dest:
return handle_add_form("URL or Reticulum destination hash is required.")
if reticulum_dest and (len(reticulum_dest) != 32 or not all(c in "0123456789abcdefABCDEF" for c in reticulum_dest)):
return handle_add_form("Invalid reticulum destination hash. Must be 32 hex characters.")
if url and not url.startswith(("http://", "https://")):
return handle_add_form("URL must start with http:// or https://") return handle_add_form("URL must start with http:// or https://")
# Try auto-index first if reticulum_dest and not url:
url = f"reticulum:{reticulum_dest}"
try: try:
title = index_url(url, note) title = index_url(url, note, reticulum_dest)
if tags: if tags:
db = get_db() db = get_db()
try: try:
@ -375,7 +388,9 @@ def handle_add_submit(body):
db.commit() db.commit()
finally: finally:
return_db(db) return_db(db)
return handle_add_form(f'Indexed: <a href="{esc(url)}">{esc(title)}</a>')
display_url = url if url.startswith("http") else reticulum_dest
return handle_add_form(f'Indexed: {esc(display_url)}')
except ValueError as e: except ValueError as e:
return handle_add_form(f"Error: {esc(str(e))}") return handle_add_form(f"Error: {esc(str(e))}")
@ -394,6 +409,7 @@ def handle_add_submit(body):
f'<input type="hidden" name="url" value="{esc(url)}">' f'<input type="hidden" name="url" value="{esc(url)}">'
f'<input type="hidden" name="note" value="{esc(note)}">' f'<input type="hidden" name="note" value="{esc(note)}">'
f'<input type="hidden" name="tags" value="{esc(tags)}">' f'<input type="hidden" name="tags" value="{esc(tags)}">'
f'<input type="hidden" name="reticulum_dest" value="{esc(reticulum_dest)}">'
f'<label>Title:</label><br>' f'<label>Title:</label><br>'
f'<input name="manual_title" size="50" placeholder="page title" required><br><br>' f'<input name="manual_title" size="50" placeholder="page title" required><br><br>'
f'<label>Description:</label><br>' f'<label>Description:</label><br>'
@ -409,11 +425,16 @@ def handle_add_manual_submit(body):
url = clean_url(body.get("url", [""])[0].strip()) url = clean_url(body.get("url", [""])[0].strip())
note = body.get("note", [""])[0].strip() note = body.get("note", [""])[0].strip()
tags = body.get("tags", [""])[0].strip() tags = body.get("tags", [""])[0].strip()
reticulum_dest = body.get("reticulum_dest", [""])[0].strip()
manual_title = body.get("manual_title", [""])[0].strip() manual_title = body.get("manual_title", [""])[0].strip()
manual_desc = body.get("manual_description", [""])[0].strip() manual_desc = body.get("manual_description", [""])[0].strip()
if not url: if not url and not reticulum_dest:
return handle_add_form("URL is required.") return handle_add_form("URL or Reticulum destination hash is required.")
if not url and reticulum_dest:
url = f"reticulum:{reticulum_dest}"
if not manual_title or not manual_desc: if not manual_title or not manual_desc:
return handle_add_form("Title and description are required for manual entry.") return handle_add_form("Title and description are required for manual entry.")
@ -421,12 +442,11 @@ def handle_add_manual_submit(body):
try: try:
now = __import__("datetime").datetime.now().strftime("%Y-%m-%dT%H:%M:%S") now = __import__("datetime").datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
# Insert the page
db.execute( db.execute(
"INSERT INTO pages (url, title, body, note, last_modified, summary) VALUES (?, ?, ?, ?, ?, ?) " "INSERT INTO pages (url, title, body, note, last_modified, summary, reticulum_dest) VALUES (?, ?, ?, ?, ?, ?, ?) "
"ON CONFLICT(url) DO UPDATE SET title=excluded.title, body=excluded.body, " "ON CONFLICT(url) DO UPDATE SET title=excluded.title, body=excluded.body, "
"note=excluded.note, last_modified=excluded.last_modified, summary=excluded.summary", "note=excluded.note, last_modified=excluded.last_modified, summary=excluded.summary, reticulum_dest=excluded.reticulum_dest",
(url, manual_title, manual_desc, note, now, manual_desc[:200]), (url, manual_title, manual_desc, note, now, manual_desc[:200], reticulum_dest),
) )
# Get the page ID # Get the page ID