tinyweb/app.py
Derick Phan 1bc695f508
Harden network and privacy defaults; fix several bugs
Security:
- Bind HTTP gateway to 127.0.0.1 by default; add --bind for LAN opt-in
- Restrict Reticulum mesh surface to GET /api/sites only (CSRF cannot
  authenticate mesh callers, so gate by whitelist)
- Cap request body size at 16 MiB to prevent memory DoS
- Redact /bookmark query strings from request logs so the bookmark token
  and URLs do not land in stdout / docker / journal logs
- Tighten FTS5 sanitizer: strip colon, drop AND/OR/NOT/NEAR operator words
- Expand .dockerignore; document trust model in README

Features:
- Add sharing mode toggle (share everything except private vs share only
  public-tagged) with /share/preview so users can see what subscribers
  would receive before enabling sharing

Bugs:
- handle_export() crashed on every call (missing query kwarg)
- Dead float16 decompression branch in embeddings.py silently corrupted
  the HNSW index when compress_embeddings was on
- GATEWAY_PORT staleness: --port and find_available_port had no effect
  on the actual bind
- semantic_search default mismatched between db.py ("1") and the rest of
  the app ("0"), causing embeddings to be generated when the UI said off
- Connection pool returned connections with uncommitted transactions to
  the next consumer
- Gateway POST body decode 502'd on non-UTF-8 input
- ensure_rns_config clobbered user-edited ~/.reticulum/config; now only
  rewrites files it authored (sentinel-tagged)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 15:37:45 -07:00

294 lines
10 KiB
Python

import os
import sys
import time
import threading
import argparse
import RNS
from http.server import HTTPServer
from db import init_db, get_setting, set_setting
from handlers import dispatch_request
import gateway
from gateway import GatewayState, GatewayHandler
APP_NAME = "tinyweb"
ASPECTS = ["server"]
IDENTITY_FILE = "tinyweb_identity"
DEFAULT_TRANSPORT_HOST = "reticulum.derickphan.com"
DEFAULT_TRANSPORT_PORT = 4242
DATA_DIR = os.path.expanduser("~/.tinyweb")
def get_transport_config():
host = get_setting("transport_host", DEFAULT_TRANSPORT_HOST)
port = get_setting("transport_port", str(DEFAULT_TRANSPORT_PORT))
return host, int(port)
def find_available_port(start=8080, max_attempts=20, host="127.0.0.1"):
"""Find an available port starting from start."""
import socket
for port in range(start, start + max_attempts):
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind((host, port))
return port
except OSError:
continue
return start
def get_version():
"""Get version from git tag or VERSION file."""
try:
import subprocess
tag = subprocess.check_output(
["git", "describe", "--tags", "--abbrev=0"],
stderr=subprocess.DEVNULL,
text=True
).strip()
if tag.startswith("v"):
return tag[1:]
return tag
except Exception:
version_file = os.path.join(os.path.dirname(__file__), "VERSION")
if os.path.exists(version_file):
with open(version_file) as f:
return f.read().strip()
return "0.0.0"
def load_or_create_identity():
os.makedirs(DATA_DIR, exist_ok=True)
identity_path = os.path.join(DATA_DIR, IDENTITY_FILE)
if os.path.isfile(identity_path):
current = os.stat(identity_path).st_mode & 0o777
if current != 0o600:
os.chmod(identity_path, 0o600)
return RNS.Identity.from_file(identity_path)
identity = RNS.Identity()
identity.to_file(identity_path)
os.chmod(identity_path, 0o600)
return identity
# Remote peers on the Reticulum mesh can only reach a narrow, read-only surface.
# Any other method/path is rejected here — CSRF cannot authenticate mesh callers
# (the attacker controls both the "cookie" and the "form" side of the check), so
# gating by whitelist is the only safe option.
_RNS_ALLOWED = {("GET", "/api/sites")}
def rns_request_handler(path, data, request_id, link_id, remote_identity, requested_at):
if data is None:
data = {"method": "GET", "path": "/", "query": {}, "body": {}, "gateway_host": ""}
method = data.get("method", "GET")
req_path = data.get("path", "/")
if (method, req_path) not in _RNS_ALLOWED:
return {
"status": 403,
"content_type": "text/plain; charset=utf-8",
"body": "Forbidden: this endpoint is not available over Reticulum.",
"headers": {},
}
return dispatch_request(data)
def start_gateway(reticulum, bind_host="127.0.0.1"):
GatewayState.reticulum = reticulum
GatewayState.local_dispatch = dispatch_request
server = HTTPServer((bind_host, gateway.GATEWAY_PORT), GatewayHandler)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
def _config_settings_match(config_file, desired_host, desired_port):
"""Check if existing config transport and LoRa settings match desired values."""
import configparser
try:
config = configparser.ConfigParser()
config.read(config_file)
# Check TCP transport
tcp_enabled = get_setting("tcp_enabled", "1") == "1"
has_tcp = config.has_section("TCP Transport")
if tcp_enabled != has_tcp:
return False
if tcp_enabled and has_tcp:
if (config.get("TCP Transport", "target_host") != desired_host or
config.get("TCP Transport", "target_port") != str(desired_port)):
return False
# Check LoRa
lora_enabled = get_setting("lora_enabled", "0") == "1"
has_lora = config.has_section("RNode LoRa")
if lora_enabled != has_lora:
return False
if lora_enabled and has_lora:
if config.get("RNode LoRa", "port", fallback="") != get_setting("lora_port", ""):
return False
if config.get("RNode LoRa", "frequency", fallback="") != get_setting("lora_frequency", "867200000"):
return False
return True
except Exception:
pass
return False
def ensure_rns_config(config_dir, transport_host=None, transport_port=None):
"""Generate a default Reticulum config with internet transport if none exists."""
if config_dir is None:
config_dir = os.path.expanduser("~/.reticulum")
config_file = os.path.join(config_dir, "config")
if transport_host is None:
transport_host = get_setting("transport_host", DEFAULT_TRANSPORT_HOST)
if transport_port is None:
transport_port = int(get_setting("transport_port", str(DEFAULT_TRANSPORT_PORT)))
managed_sentinel = "# managed by tinyweb"
if os.path.exists(config_file):
try:
with open(config_file) as f:
existing = f.read()
except OSError:
existing = ""
if managed_sentinel not in existing:
# User-authored config — don't clobber it.
if not _config_settings_match(config_file, transport_host, transport_port):
print(
f"Warning: {config_file} was not created by tinyweb; "
"leaving it alone. Edit it manually to change transport/LoRa settings."
)
return
if _config_settings_match(config_file, transport_host, transport_port):
return
# Build optional interface blocks
tcp_block = ""
if get_setting("tcp_enabled", "1") == "1":
tcp_block = f"""
[[TCP Transport]]
type = TCPClientInterface
enabled = yes
target_host = {transport_host}
target_port = {transport_port}
"""
lora_block = ""
if get_setting("lora_enabled", "0") == "1":
lora_port = get_setting("lora_port", "")
if lora_port:
lora_frequency = get_setting("lora_frequency", "867200000")
lora_bandwidth = get_setting("lora_bandwidth", "125000")
lora_txpower = get_setting("lora_txpower", "7")
lora_sf = get_setting("lora_sf", "8")
lora_cr = get_setting("lora_cr", "5")
lora_block = f"""
[[RNode LoRa]]
type = RNodeInterface
enabled = yes
port = {lora_port}
frequency = {lora_frequency}
bandwidth = {lora_bandwidth}
txpower = {lora_txpower}
spreadingfactor = {lora_sf}
codingrate = {lora_cr}
"""
os.makedirs(config_dir, exist_ok=True)
with open(config_file, "w") as f:
f.write(f"""{managed_sentinel}
[reticulum]
enable_transport = False
share_instance = No
[logging]
loglevel = 4
[interfaces]
[[Default Interface]]
type = AutoInterface
enabled = Yes
{tcp_block}{lora_block}""")
print(f"Created Reticulum config at {config_file}")
def _preload_embeddings():
"""Pre-load the embedding model and build the HNSW index in background."""
if get_setting("semantic_search", "0") != "1":
print("Semantic search disabled.")
return
try:
from embeddings import _get_session, _get_reranker, build_index
_get_session()
build_index()
if get_setting("use_reranker", "0") == "1":
_get_reranker()
print("Semantic search ready (with reranker).")
else:
print("Semantic search ready.")
except Exception as e:
print(f"Semantic search unavailable: {e}")
def main():
parser = argparse.ArgumentParser(prog="tinyweb", description="Personal decentralized search engine")
parser.add_argument("--version", "-v", action="store_true", help="Show version")
parser.add_argument("--port", "-p", type=int, default=None, help="HTTP gateway port (default: 8080)")
parser.add_argument(
"--bind", "-b", default="127.0.0.1",
help="Address to bind the HTTP gateway to (default: 127.0.0.1). "
"Use 0.0.0.0 to expose to the LAN; note that the web UI has no authentication.",
)
args = parser.parse_args()
if args.version:
print(f"TinyWeb {get_version()}")
return
bind_host = args.bind
port = args.port or 8080
gateway.GATEWAY_PORT = find_available_port(port, host=bind_host)
init_db()
transport_host = get_setting("transport_host", DEFAULT_TRANSPORT_HOST)
transport_port = int(get_setting("transport_port", str(DEFAULT_TRANSPORT_PORT)))
threading.Thread(target=_preload_embeddings, daemon=True).start()
config_dir = os.environ.get("RNS_CONFIG_DIR")
ensure_rns_config(config_dir, transport_host, transport_port)
reticulum = RNS.Reticulum(configdir=config_dir)
identity = load_or_create_identity()
destination = RNS.Destination(
identity,
RNS.Destination.IN,
RNS.Destination.SINGLE,
APP_NAME,
*ASPECTS,
)
destination.register_request_handler(
"/tinyweb",
response_generator=rns_request_handler,
allow=RNS.Destination.ALLOW_ALL,
)
# Brief delay to ensure all interfaces (especially TCP) are fully ready
time.sleep(2)
destination.announce()
set_setting("dest_hash", destination.hash.hex())
start_gateway(reticulum, bind_host=bind_host)
print(f"TinyWeb running!")
if bind_host in ("0.0.0.0", "::"):
print(f"Open http://localhost:{gateway.GATEWAY_PORT} in your browser")
print(f"WARNING: listening on {bind_host} — the web UI has no authentication. "
"Anyone on your network can control this instance.")
else:
print(f"Open http://{bind_host}:{gateway.GATEWAY_PORT} in your browser")
print(f"Destination hash: {RNS.prettyhexrep(destination.hash)} (share this so friends can subscribe)")
while True:
time.sleep(1)
if __name__ == "__main__":
main()