Harden network and privacy defaults; fix several bugs

Security:
- Bind HTTP gateway to 127.0.0.1 by default; add --bind for LAN opt-in
- Restrict Reticulum mesh surface to GET /api/sites only (CSRF cannot
  authenticate mesh callers, so gate by whitelist)
- Cap request body size at 16 MiB to prevent memory DoS
- Redact /bookmark query strings from request logs so the bookmark token
  and URLs do not land in stdout / docker / journal logs
- Tighten FTS5 sanitizer: strip colon, drop AND/OR/NOT/NEAR operator words
- Expand .dockerignore; document trust model in README

Features:
- Add sharing mode toggle (share everything except private vs share only
  public-tagged) with /share/preview so users can see what subscribers
  would receive before enabling sharing

Bugs:
- handle_export() crashed on every call (missing query kwarg)
- Dead float16 decompression branch in embeddings.py silently corrupted
  the HNSW index when compress_embeddings was on
- GATEWAY_PORT staleness: --port and find_available_port had no effect
  on the actual bind
- semantic_search default mismatched between db.py ("1") and the rest of
  the app ("0"), causing embeddings to be generated when the UI said off
- Connection pool returned connections with uncommitted transactions to
  the next consumer
- Gateway POST body decode 502'd on non-UTF-8 input
- ensure_rns_config clobbered user-edited ~/.reticulum/config; now only
  rewrites files it authored (sentinel-tagged)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Derick Phan 2026-04-23 15:37:45 -07:00
parent ce50150363
commit 1bc695f508
No known key found for this signature in database
8 changed files with 266 additions and 56 deletions

View file

@ -1,5 +1,15 @@
__pycache__/
**/__pycache__/
*.pyc
index.db*
index.hnsw
tinyweb_identity
.git/
.gitignore
*.md
.env
.env.*
.venv/
venv/
models/
.DS_Store

View file

@ -122,10 +122,13 @@ Or with docker-compose (see above) — data persists in the named volume.
### Command line options
```bash
./TinyWeb --version # Show version
./TinyWeb -p 9000 # Use port 9000 instead of default 8080
./TinyWeb --version # Show version
./TinyWeb -p 9000 # Use port 9000 instead of default 8080
./TinyWeb --bind 0.0.0.0 # Expose the web UI to your LAN (see warning below)
```
By default, the web UI binds to `127.0.0.1` and is only reachable from the machine running TinyWeb. **The UI has no authentication** — anyone who can reach the port can read, add, and delete entries, and change settings. Only pass `--bind 0.0.0.0` if you fully trust your network, or put TinyWeb behind an authenticating reverse proxy.
## Getting started
```bash
@ -133,7 +136,7 @@ pip install -r requirements.txt
python app.py
```
This starts the Reticulum server and an HTTP gateway on `http://localhost:8080`. Open it in your browser.
This starts the Reticulum server and an HTTP gateway on `http://127.0.0.1:8080`. Open it in your browser. The UI is localhost-only by default; see `--bind` under *Command line options* if you want to reach it from another machine.
Your destination hash is printed on startup — share it with friends so they can subscribe to your index.
@ -168,7 +171,9 @@ themes/ — Saved HTML templates (e.g. kodama.html)
## Security
TinyWeb includes several hardening measures:
**The web UI has no authentication.** It is bound to `127.0.0.1` by default, so only processes on the local machine can reach it. If you pass `--bind 0.0.0.0` (or run inside a container with a published port), anyone who can reach that address can fully control your instance — reading private entries, changing settings, and modifying the HTML template (which runs in your browser). Put TinyWeb behind a reverse proxy with auth before exposing it beyond localhost.
Other hardening measures:
- **CSRF protection** — All POST forms use per-session tokens via double-submit cookies
- **SSRF prevention** — URL fetching validates hostnames against private IP ranges, with redirect re-validation

62
app.py
View file

@ -8,7 +8,8 @@ from http.server import HTTPServer
from db import init_db, get_setting, set_setting
from handlers import dispatch_request
from gateway import GatewayState, GatewayHandler, GATEWAY_PORT
import gateway
from gateway import GatewayState, GatewayHandler
APP_NAME = "tinyweb"
ASPECTS = ["server"]
@ -24,13 +25,13 @@ def get_transport_config():
return host, int(port)
def find_available_port(start=8080, max_attempts=20):
def find_available_port(start=8080, max_attempts=20, host="127.0.0.1"):
"""Find an available port starting from start."""
import socket
for port in range(start, start + max_attempts):
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("0.0.0.0", port))
s.bind((host, port))
return port
except OSError:
continue
@ -71,16 +72,32 @@ def load_or_create_identity():
return identity
# Remote peers on the Reticulum mesh can only reach a narrow, read-only surface.
# Any other method/path is rejected here — CSRF cannot authenticate mesh callers
# (the attacker controls both the "cookie" and the "form" side of the check), so
# gating by whitelist is the only safe option.
_RNS_ALLOWED = {("GET", "/api/sites")}
def rns_request_handler(path, data, request_id, link_id, remote_identity, requested_at):
if data is None:
data = {"method": "GET", "path": "/", "query": {}, "body": {}, "gateway_host": ""}
method = data.get("method", "GET")
req_path = data.get("path", "/")
if (method, req_path) not in _RNS_ALLOWED:
return {
"status": 403,
"content_type": "text/plain; charset=utf-8",
"body": "Forbidden: this endpoint is not available over Reticulum.",
"headers": {},
}
return dispatch_request(data)
def start_gateway(reticulum):
def start_gateway(reticulum, bind_host="127.0.0.1"):
GatewayState.reticulum = reticulum
GatewayState.local_dispatch = dispatch_request
server = HTTPServer(("0.0.0.0", GATEWAY_PORT), GatewayHandler)
server = HTTPServer((bind_host, gateway.GATEWAY_PORT), GatewayHandler)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
@ -126,7 +143,21 @@ def ensure_rns_config(config_dir, transport_host=None, transport_port=None):
if transport_port is None:
transport_port = int(get_setting("transport_port", str(DEFAULT_TRANSPORT_PORT)))
managed_sentinel = "# managed by tinyweb"
if os.path.exists(config_file):
try:
with open(config_file) as f:
existing = f.read()
except OSError:
existing = ""
if managed_sentinel not in existing:
# User-authored config — don't clobber it.
if not _config_settings_match(config_file, transport_host, transport_port):
print(
f"Warning: {config_file} was not created by tinyweb; "
"leaving it alone. Edit it manually to change transport/LoRa settings."
)
return
if _config_settings_match(config_file, transport_host, transport_port):
return
@ -164,7 +195,8 @@ def ensure_rns_config(config_dir, transport_host=None, transport_port=None):
os.makedirs(config_dir, exist_ok=True)
with open(config_file, "w") as f:
f.write(f"""[reticulum]
f.write(f"""{managed_sentinel}
[reticulum]
enable_transport = False
share_instance = No
@ -201,15 +233,20 @@ def main():
parser = argparse.ArgumentParser(prog="tinyweb", description="Personal decentralized search engine")
parser.add_argument("--version", "-v", action="store_true", help="Show version")
parser.add_argument("--port", "-p", type=int, default=None, help="HTTP gateway port (default: 8080)")
parser.add_argument(
"--bind", "-b", default="127.0.0.1",
help="Address to bind the HTTP gateway to (default: 127.0.0.1). "
"Use 0.0.0.0 to expose to the LAN; note that the web UI has no authentication.",
)
args = parser.parse_args()
if args.version:
print(f"TinyWeb {get_version()}")
return
bind_host = args.bind
port = args.port or 8080
import gateway
gateway.GATEWAY_PORT = find_available_port(port)
gateway.GATEWAY_PORT = find_available_port(port, host=bind_host)
init_db()
transport_host = get_setting("transport_host", DEFAULT_TRANSPORT_HOST)
@ -238,10 +275,15 @@ def main():
time.sleep(2)
destination.announce()
set_setting("dest_hash", destination.hash.hex())
start_gateway(reticulum)
start_gateway(reticulum, bind_host=bind_host)
print(f"TinyWeb running!")
print(f"Open http://localhost:{GATEWAY_PORT} in your browser")
if bind_host in ("0.0.0.0", "::"):
print(f"Open http://localhost:{gateway.GATEWAY_PORT} in your browser")
print(f"WARNING: listening on {bind_host} — the web UI has no authentication. "
"Anyone on your network can control this instance.")
else:
print(f"Open http://{bind_host}:{gateway.GATEWAY_PORT} in your browser")
print(f"Destination hash: {RNS.prettyhexrep(destination.hash)} (share this so friends can subscribe)")
while True:

10
db.py
View file

@ -123,6 +123,14 @@ def get_db():
def return_db(db):
try:
db.rollback()
except Exception:
try:
db.close()
except Exception:
pass
return
with _pool_lock:
if len(_pool) < _POOL_SIZE:
_pool.append(db)
@ -412,7 +420,7 @@ def index_url(url, note="", reticulum_dest=""):
(page_id, href, label),
)
db.commit()
if get_setting("semantic_search", "1") == "1":
if get_setting("semantic_search", "0") == "1":
try:
from embeddings import store_embeddings
store_embeddings(page_id, title, body, db)

View file

@ -261,6 +261,13 @@ def _decompress(embeddings):
return embeddings
def _blob_to_vec(buf):
"""Decode a stored embedding blob to a float32 vector, inferring dtype from length."""
if len(buf) == DIMS * 2:
return np.frombuffer(buf, dtype=np.float16).astype(np.float32)
return np.frombuffer(buf, dtype=np.float32)
# ---------------------------------------------------------------------------
# HNSW index management
# ---------------------------------------------------------------------------
@ -294,9 +301,7 @@ def build_index(db=None):
(BATCH_SIZE, offset),
).fetchall()
for r in rows:
emb = np.frombuffer(r["embedding"], dtype=np.float32)
if emb.dtype == np.float16:
emb = emb.astype(np.float32)
emb = _blob_to_vec(r["embedding"])
all_ids.append(r["id"])
all_embeddings.append(emb)
finally:

View file

@ -30,4 +30,5 @@ EOF
fi
export RNS_CONFIG_DIR="$CONFIG_DIR"
exec python app.py
# Bind to 0.0.0.0 inside the container; isolation is handled by Docker's port mapping.
exec python app.py --bind 0.0.0.0 "$@"

View file

@ -1,3 +1,4 @@
import re
import sys
import time
import threading
@ -9,6 +10,7 @@ APP_NAME = "tinyweb"
ASPECTS = ["server"]
GATEWAY_PORT = 8080
REQUEST_TIMEOUT = 60
MAX_BODY_SIZE = 16 * 1024 * 1024 # 16 MiB — covers /import and every other form
class GatewayState:
@ -71,8 +73,18 @@ class GatewayHandler(BaseHTTPRequestHandler):
body = {}
if method == "POST":
length = int(self.headers.get("Content-Length", 0))
raw = self.rfile.read(length).decode()
try:
length = int(self.headers.get("Content-Length", 0))
except ValueError:
self.send_error(400, "Invalid Content-Length")
return
if length < 0:
self.send_error(400, "Invalid Content-Length")
return
if length > MAX_BODY_SIZE:
self.send_error(413, "Request body too large")
return
raw = self.rfile.read(length).decode("utf-8", errors="replace")
body = parse_qs(raw)
# Parse cookies
@ -152,7 +164,14 @@ class GatewayHandler(BaseHTTPRequestHandler):
self._forward("POST")
def log_message(self, format, *args):
print(f"[Gateway] {args[0]}")
try:
msg = format % args
except TypeError:
msg = format
# /bookmark carries a long-lived token and the URL being indexed —
# redact the query so it doesn't end up in stdout, journald, docker logs, etc.
msg = re.sub(r'(/bookmark)\?\S*', r'\1?[redacted]', msg)
print(f"[Gateway] {msg}")
def main():

View file

@ -50,14 +50,18 @@ def _sanitize_fts_query(query):
if not words:
return '""'
tokens = []
last_idx = len(words) - 1
for i, w in enumerate(words):
# Strip FTS5 special characters to prevent injection
cleaned = re.sub(r'["\'\(\)\*\+\-\^~]', '', w).strip()
# Strip FTS5 special characters (operators, column filter colon) to prevent injection
cleaned = re.sub(r'["\'\(\)\*\+\-\^~:]', '', w).strip()
if not cleaned:
continue
if cleaned.lower() in _STOPWORDS:
continue
if i == len(words) - 1:
# Drop FTS5 operator words so they aren't parsed as operators on the unquoted last token
if cleaned.upper() in ("AND", "OR", "NOT", "NEAR"):
continue
if i == last_idx:
# Prefix match on the last token for partial word matching
tokens.append(f"{cleaned}*")
else:
@ -688,8 +692,11 @@ def handle_bookmark(query):
MAX_EXPORT = 10000
def handle_export():
batch = int((query or {}).get("batch", ["0"])[0])
def handle_export(query=None):
try:
batch = int((query or {}).get("batch", ["0"])[0])
except (TypeError, ValueError):
batch = 0
db = get_db()
try:
rows = db.execute(
@ -752,6 +759,10 @@ def handle_style_form(msg=""):
name = get_site_name()
sharing = get_setting("sharing_enabled", "0")
checked = " checked" if sharing == "1" else ""
sharing_mode = get_setting("sharing_mode", "exclude_private")
exclude_checked = " checked" if sharing_mode != "require_public" else ""
require_checked = " checked" if sharing_mode == "require_public" else ""
shared_count = _count_shared_pages()
semantic = get_setting("semantic_search", "0")
semantic_checked = " checked" if semantic == "1" else ""
reranker = get_setting("use_reranker", "0")
@ -784,7 +795,18 @@ def handle_style_form(msg=""):
f"<h2>sharing</h2>"
f'<label><input type="checkbox" name="sharing_enabled" value="1"{checked}>'
f" share your site list publicly at /api/sites</label><br>"
f"<small>Note: pages tagged: private will not be shared.</small><br><br>"
f'<div style="margin-top:0.6rem">'
f"<small>What to share:</small><br>"
f'<label><input type="radio" name="sharing_mode" value="exclude_private"{exclude_checked}>'
f' share all pages except those tagged <code>private</code></label><br>'
f'<label><input type="radio" name="sharing_mode" value="require_public"{require_checked}>'
f' share only pages tagged <code>public</code></label><br>'
f'<small>The <code>private</code> tag always excludes a page, even in public-only mode.</small>'
f'</div>'
f'<p style="margin-top:0.6rem">'
f'Currently sharing <b>{shared_count}</b> page(s). '
f'<a href="/share/preview">preview what subscribers would see</a>'
f'</p>'
f"<h2>mesh network</h2>"
f"<p>Choose how to connect to the mesh. You can enable both for maximum reach.</p>"
f"<h3>internet</h3>"
@ -868,6 +890,10 @@ def handle_style_submit(body):
template = body.get("template", [""])[0].replace("\r\n", "\n").replace("\r", "\n")
name = body.get("site_name", ["tinyweb"])[0].strip()
sharing = "1" if body.get("sharing_enabled") else "0"
sharing_mode = body.get("sharing_mode", ["exclude_private"])[0]
if sharing_mode not in ("exclude_private", "require_public"):
sharing_mode = "exclude_private"
set_setting("sharing_mode", sharing_mode)
semantic = "1" if body.get("semantic_search") else "0"
reranker = "1" if body.get("use_reranker") else "0"
compress = "1" if body.get("compress_embeddings") else "0"
@ -1008,6 +1034,123 @@ def handle_tag_browse(tag_name, query=None):
MAX_API_SITES = 5000
def _page_is_shared(tags, mode):
"""Decide whether a page with the given tags is shared under the given mode.
`private` always wins a page tagged private is never shared, regardless of mode.
"""
if "private" in tags:
return False
if mode == "require_public" and "public" not in tags:
return False
return True
def _shared_sites(db, since=""):
"""Return the full site records that a subscriber would receive.
The caller owns the db connection.
"""
mode = get_setting("sharing_mode", "exclude_private")
if since:
rows = db.execute(
"SELECT id, url, title, note, last_modified FROM pages "
"WHERE last_modified > ? ORDER BY id DESC LIMIT ?",
(since, MAX_API_SITES),
).fetchall()
else:
rows = db.execute(
"SELECT id, url, title, note, last_modified FROM pages ORDER BY id DESC LIMIT ?",
(MAX_API_SITES,),
).fetchall()
sites = []
for r in rows:
tags = _get_page_tags(r["id"], db)
if not _page_is_shared(tags, mode):
continue
sites.append({
"url": r["url"], "title": r["title"], "note": r["note"],
"tags": tags, "last_modified": r["last_modified"] or "",
})
return sites
def _shared_all_urls(db):
"""Return the URL list a subscriber uses to detect deletions."""
mode = get_setting("sharing_mode", "exclude_private")
rows = db.execute(
"SELECT id, url FROM pages ORDER BY id DESC LIMIT ?", (MAX_API_SITES,)
).fetchall()
return [r["url"] for r in rows if _page_is_shared(_get_page_tags(r["id"], db), mode)]
def _count_shared_pages():
"""Cheap page count under the current sharing rule — used by the settings UI."""
db = get_db()
try:
return len(_shared_all_urls(db))
finally:
return_db(db)
def handle_share_preview():
"""Show the list of pages a subscriber would currently receive.
Works regardless of whether sharing is enabled lets the user see the surface
before flipping it on.
"""
mode = get_setting("sharing_mode", "exclude_private")
mode_label = (
"only pages tagged <code>public</code>"
if mode == "require_public"
else "all pages except those tagged <code>private</code>"
)
sharing_on = get_setting("sharing_enabled", "0") == "1"
status = (
'<p>Sharing is <b>enabled</b>. Subscribers see the pages listed below.</p>'
if sharing_on else
'<p>Sharing is <b>disabled</b>. Nothing is actually being shared right now; '
'this is the list that would be exposed if you enabled it.</p>'
)
db = get_db()
try:
sites = _shared_sites(db)
finally:
return_db(db)
if not sites:
body = (
"<h1>sharing preview</h1>"
f"<p>Rule: {mode_label}.</p>"
f"{status}"
"<p><em>No pages match the current rule.</em></p>"
'<p><a href="/style">back to settings</a></p>'
)
return _respond(body)
rows = ""
for s in sites:
tags_html = ""
if s["tags"]:
tags_html = " " + " ".join(f"[{esc(t)}]" for t in s["tags"])
note_html = f' — <em>{esc(s["note"])}</em>' if s["note"] else ""
rows += (
f'<li>'
f'<a href="{esc(s["url"])}" rel="noreferrer noopener">{esc(s["title"] or s["url"])}</a>'
f'{note_html}{tags_html} '
f'<br><small>{esc(s["url"])}</small>'
f'</li>'
)
body = (
"<h1>sharing preview</h1>"
f"<p>Rule: {mode_label}.</p>"
f"{status}"
f"<p><b>{len(sites)}</b> page(s) visible to subscribers.</p>"
f"<ul>{rows}</ul>"
'<p><a href="/style">back to settings</a></p>'
)
return _respond(body)
def handle_api_sites(query=None):
if get_setting("sharing_enabled", "0") != "1":
return _json_response(
@ -1018,33 +1161,8 @@ def handle_api_sites(query=None):
since = (query or {}).get("since", [""])[0].strip()
db = get_db()
try:
if since:
rows = db.execute(
"SELECT id, url, title, note, last_modified FROM pages "
"WHERE last_modified > ? ORDER BY id DESC LIMIT ?",
(since, MAX_API_SITES),
).fetchall()
else:
rows = db.execute(
"SELECT id, url, title, note, last_modified FROM pages ORDER BY id DESC LIMIT ?",
(MAX_API_SITES,),
).fetchall()
sites = []
for r in rows:
tags = _get_page_tags(r["id"], db)
if "private" in tags:
continue # Skip pages tagged private from sharing
sites.append({
"url": r["url"], "title": r["title"], "note": r["note"],
"tags": tags, "last_modified": r["last_modified"] or "",
})
# Include list of all current URLs so subscriber can detect deletions (limited)
all_urls = None
if not since:
all_url_rows = db.execute(
"SELECT p.id, p.url FROM pages ORDER BY id DESC LIMIT ?", (MAX_API_SITES,)
).fetchall()
all_urls = [r["url"] for r in all_url_rows if "private" not in _get_page_tags(r["id"], db)]
sites = _shared_sites(db, since=since)
all_urls = _shared_all_urls(db) if not since else None
finally:
return_db(db)
data = {"name": get_site_name(), "sites": sites}
@ -1478,10 +1596,12 @@ def _dispatch_inner(data):
return handle_bookmark(query)
elif path == "/style":
return handle_style_form()
elif path == "/share/preview":
return handle_share_preview()
elif path == "/about":
return handle_about()
elif path == "/export":
return handle_export()
return handle_export(query)
elif path == "/import":
return handle_import_form()
elif path == "/tags":