tightened network defaults, squashed bugs

Security:
- Bind HTTP gateway to 127.0.0.1 by default; add --bind for LAN opt-in
- Restrict Reticulum mesh surface to GET /api/sites only (CSRF cannot
  authenticate mesh callers, so gate by whitelist)
- Cap request body size at 16 MiB to prevent memory DoS
- Redact /bookmark query strings from request logs so the bookmark token
  and URLs do not land in stdout / docker / journal logs
- Tighten FTS5 sanitizer: strip colon, drop AND/OR/NOT/NEAR operator words
- Expand .dockerignore; document trust model in README

Features:
- Add sharing mode toggle (share everything except private vs share only
  public-tagged) with /share/preview so users can see what subscribers
  would receive before enabling sharing

Bugs:
- handle_export() crashed on every call (missing query kwarg)
- Dead float16 decompression branch in embeddings.py silently corrupted
  the HNSW index when compress_embeddings was on
- GATEWAY_PORT staleness: --port and find_available_port had no effect
  on the actual bind
- semantic_search default mismatched between db.py ("1") and the rest of
  the app ("0"), causing embeddings to be generated when the UI said off
- Connection pool returned connections with uncommitted transactions to
  the next consumer
- Gateway POST body decode 502'd on non-UTF-8 input
- ensure_rns_config clobbered user-edited ~/.reticulum/config; now only
  rewrites files it authored (sentinel-tagged)
This commit is contained in:
lichenblankie 2026-04-23 15:37:45 -07:00
parent e3aadf3947
commit 8205db9bc3
8 changed files with 266 additions and 56 deletions

View file

@ -50,14 +50,18 @@ def _sanitize_fts_query(query):
if not words:
return '""'
tokens = []
last_idx = len(words) - 1
for i, w in enumerate(words):
# Strip FTS5 special characters to prevent injection
cleaned = re.sub(r'["\'\(\)\*\+\-\^~]', '', w).strip()
# Strip FTS5 special characters (operators, column filter colon) to prevent injection
cleaned = re.sub(r'["\'\(\)\*\+\-\^~:]', '', w).strip()
if not cleaned:
continue
if cleaned.lower() in _STOPWORDS:
continue
if i == len(words) - 1:
# Drop FTS5 operator words so they aren't parsed as operators on the unquoted last token
if cleaned.upper() in ("AND", "OR", "NOT", "NEAR"):
continue
if i == last_idx:
# Prefix match on the last token for partial word matching
tokens.append(f"{cleaned}*")
else:
@ -688,8 +692,11 @@ def handle_bookmark(query):
MAX_EXPORT = 10000
def handle_export():
batch = int((query or {}).get("batch", ["0"])[0])
def handle_export(query=None):
try:
batch = int((query or {}).get("batch", ["0"])[0])
except (TypeError, ValueError):
batch = 0
db = get_db()
try:
rows = db.execute(
@ -752,6 +759,10 @@ def handle_style_form(msg=""):
name = get_site_name()
sharing = get_setting("sharing_enabled", "0")
checked = " checked" if sharing == "1" else ""
sharing_mode = get_setting("sharing_mode", "exclude_private")
exclude_checked = " checked" if sharing_mode != "require_public" else ""
require_checked = " checked" if sharing_mode == "require_public" else ""
shared_count = _count_shared_pages()
semantic = get_setting("semantic_search", "0")
semantic_checked = " checked" if semantic == "1" else ""
reranker = get_setting("use_reranker", "0")
@ -784,7 +795,18 @@ def handle_style_form(msg=""):
f"<h2>sharing</h2>"
f'<label><input type="checkbox" name="sharing_enabled" value="1"{checked}>'
f" share your site list publicly at /api/sites</label><br>"
f"<small>Note: pages tagged: private will not be shared.</small><br><br>"
f'<div style="margin-top:0.6rem">'
f"<small>What to share:</small><br>"
f'<label><input type="radio" name="sharing_mode" value="exclude_private"{exclude_checked}>'
f' share all pages except those tagged <code>private</code></label><br>'
f'<label><input type="radio" name="sharing_mode" value="require_public"{require_checked}>'
f' share only pages tagged <code>public</code></label><br>'
f'<small>The <code>private</code> tag always excludes a page, even in public-only mode.</small>'
f'</div>'
f'<p style="margin-top:0.6rem">'
f'Currently sharing <b>{shared_count}</b> page(s). '
f'<a href="/share/preview">preview what subscribers would see</a>'
f'</p>'
f"<h2>mesh network</h2>"
f"<p>Choose how to connect to the mesh. You can enable both for maximum reach.</p>"
f"<h3>internet</h3>"
@ -868,6 +890,10 @@ def handle_style_submit(body):
template = body.get("template", [""])[0].replace("\r\n", "\n").replace("\r", "\n")
name = body.get("site_name", ["tinyweb"])[0].strip()
sharing = "1" if body.get("sharing_enabled") else "0"
sharing_mode = body.get("sharing_mode", ["exclude_private"])[0]
if sharing_mode not in ("exclude_private", "require_public"):
sharing_mode = "exclude_private"
set_setting("sharing_mode", sharing_mode)
semantic = "1" if body.get("semantic_search") else "0"
reranker = "1" if body.get("use_reranker") else "0"
compress = "1" if body.get("compress_embeddings") else "0"
@ -1008,6 +1034,123 @@ def handle_tag_browse(tag_name, query=None):
MAX_API_SITES = 5000
def _page_is_shared(tags, mode):
"""Decide whether a page with the given tags is shared under the given mode.
`private` always wins a page tagged private is never shared, regardless of mode.
"""
if "private" in tags:
return False
if mode == "require_public" and "public" not in tags:
return False
return True
def _shared_sites(db, since=""):
"""Return the full site records that a subscriber would receive.
The caller owns the db connection.
"""
mode = get_setting("sharing_mode", "exclude_private")
if since:
rows = db.execute(
"SELECT id, url, title, note, last_modified FROM pages "
"WHERE last_modified > ? ORDER BY id DESC LIMIT ?",
(since, MAX_API_SITES),
).fetchall()
else:
rows = db.execute(
"SELECT id, url, title, note, last_modified FROM pages ORDER BY id DESC LIMIT ?",
(MAX_API_SITES,),
).fetchall()
sites = []
for r in rows:
tags = _get_page_tags(r["id"], db)
if not _page_is_shared(tags, mode):
continue
sites.append({
"url": r["url"], "title": r["title"], "note": r["note"],
"tags": tags, "last_modified": r["last_modified"] or "",
})
return sites
def _shared_all_urls(db):
"""Return the URL list a subscriber uses to detect deletions."""
mode = get_setting("sharing_mode", "exclude_private")
rows = db.execute(
"SELECT id, url FROM pages ORDER BY id DESC LIMIT ?", (MAX_API_SITES,)
).fetchall()
return [r["url"] for r in rows if _page_is_shared(_get_page_tags(r["id"], db), mode)]
def _count_shared_pages():
"""Cheap page count under the current sharing rule — used by the settings UI."""
db = get_db()
try:
return len(_shared_all_urls(db))
finally:
return_db(db)
def handle_share_preview():
"""Show the list of pages a subscriber would currently receive.
Works regardless of whether sharing is enabled lets the user see the surface
before flipping it on.
"""
mode = get_setting("sharing_mode", "exclude_private")
mode_label = (
"only pages tagged <code>public</code>"
if mode == "require_public"
else "all pages except those tagged <code>private</code>"
)
sharing_on = get_setting("sharing_enabled", "0") == "1"
status = (
'<p>Sharing is <b>enabled</b>. Subscribers see the pages listed below.</p>'
if sharing_on else
'<p>Sharing is <b>disabled</b>. Nothing is actually being shared right now; '
'this is the list that would be exposed if you enabled it.</p>'
)
db = get_db()
try:
sites = _shared_sites(db)
finally:
return_db(db)
if not sites:
body = (
"<h1>sharing preview</h1>"
f"<p>Rule: {mode_label}.</p>"
f"{status}"
"<p><em>No pages match the current rule.</em></p>"
'<p><a href="/style">back to settings</a></p>'
)
return _respond(body)
rows = ""
for s in sites:
tags_html = ""
if s["tags"]:
tags_html = " " + " ".join(f"[{esc(t)}]" for t in s["tags"])
note_html = f' — <em>{esc(s["note"])}</em>' if s["note"] else ""
rows += (
f'<li>'
f'<a href="{esc(s["url"])}" rel="noreferrer noopener">{esc(s["title"] or s["url"])}</a>'
f'{note_html}{tags_html} '
f'<br><small>{esc(s["url"])}</small>'
f'</li>'
)
body = (
"<h1>sharing preview</h1>"
f"<p>Rule: {mode_label}.</p>"
f"{status}"
f"<p><b>{len(sites)}</b> page(s) visible to subscribers.</p>"
f"<ul>{rows}</ul>"
'<p><a href="/style">back to settings</a></p>'
)
return _respond(body)
def handle_api_sites(query=None):
if get_setting("sharing_enabled", "0") != "1":
return _json_response(
@ -1018,33 +1161,8 @@ def handle_api_sites(query=None):
since = (query or {}).get("since", [""])[0].strip()
db = get_db()
try:
if since:
rows = db.execute(
"SELECT id, url, title, note, last_modified FROM pages "
"WHERE last_modified > ? ORDER BY id DESC LIMIT ?",
(since, MAX_API_SITES),
).fetchall()
else:
rows = db.execute(
"SELECT id, url, title, note, last_modified FROM pages ORDER BY id DESC LIMIT ?",
(MAX_API_SITES,),
).fetchall()
sites = []
for r in rows:
tags = _get_page_tags(r["id"], db)
if "private" in tags:
continue # Skip pages tagged private from sharing
sites.append({
"url": r["url"], "title": r["title"], "note": r["note"],
"tags": tags, "last_modified": r["last_modified"] or "",
})
# Include list of all current URLs so subscriber can detect deletions (limited)
all_urls = None
if not since:
all_url_rows = db.execute(
"SELECT p.id, p.url FROM pages ORDER BY id DESC LIMIT ?", (MAX_API_SITES,)
).fetchall()
all_urls = [r["url"] for r in all_url_rows if "private" not in _get_page_tags(r["id"], db)]
sites = _shared_sites(db, since=since)
all_urls = _shared_all_urls(db) if not since else None
finally:
return_db(db)
data = {"name": get_site_name(), "sites": sites}
@ -1478,10 +1596,12 @@ def _dispatch_inner(data):
return handle_bookmark(query)
elif path == "/style":
return handle_style_form()
elif path == "/share/preview":
return handle_share_preview()
elif path == "/about":
return handle_about()
elif path == "/export":
return handle_export()
return handle_export(query)
elif path == "/import":
return handle_import_form()
elif path == "/tags":