diff --git a/README.md b/README.md
index 91224e8..693ee29 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,30 @@ A personal, decentralized search engine built on the [Reticulum](https://reticul
 - **Import/export** — JSON-based backup and restore
 - **Mesh-native** — Works over Reticulum without the internet; encrypted and decentralized by default
 
+## Performance & Scale
+
+### Search Speed
+
+| Pages indexed | Search speed | Notes |
+|--------------|-------------|-------|
+| 1,000 | ~50ms | Fast local FTS5 |
+| 10,000 | ~50-100ms | Full-text search |
+| 100,000 | ~100-200ms | Combined BM25 + semantic |
+| 500,000 | ~200-400ms | With semantic enabled |
+| 1,000,000 | ~300-500ms | Hybrid search |
+
+*Times are estimates for combined BM25 + semantic search. Actual performance varies by hardware, storage type (SSD/HDD), and search complexity.*
+
+### Concurrent Connections
+
+- Database pool: 16 simultaneous connections
+- Suitable for single-user + a few subscriptions
+
+### Export
+
+- Paginated at 10,000 pages per request
+- Use `?batch=N` to export in chunks: `/export?batch=0`, `/export?batch=1`, etc.
+
 ## Download (pre-built binaries)
 
 Download the latest release for your platform from the [Releases](https://git.derickphan.com/lichenblankie/tinyweb/releases) page:
@@ -55,6 +79,21 @@ volumes:
 
 Run with `docker compose up -d`.
 
+### Storage Estimates
+
+Average web page content is ~15KB per page:
+
+| Pages | Database | Embeddings* | Total |
+|-------|----------|------------|-------|
+| 10,000 | 150MB | 80MB | ~250MB |
+| 100,000 | 1.5GB | 800MB | ~2.5GB |
+| 500,000 | 7.5GB | 4GB | ~12GB |
+| 1,000,000 | 15GB | 8GB | ~25GB |
+
+*Embeddings require semantic search to be enabled. With compression enabled (Settings > Search > AI), embeddings use ~50% less storage.
+
+Enable optional compression in Settings > Search > AI to reduce embedding storage by ~50%.
+
 ## Data storage
 
 ### Local (Python/binary)
@@ -139,6 +178,23 @@ TinyWeb includes several hardening measures:
 - **Bookmark authentication** — The bookmarklet endpoint requires a secret token
 - **Identity file protection** — The Reticulum identity key is restricted to owner-only permissions (0600)
 
+## Maintenance
+
+### Database Vacuum
+
+Over time, deleted pages leave empty space in the database. Run the vacuum tool periodically to reclaim space:
+
+1. Go to `/style` in your browser
+2. Click "vacuum database" at the bottom of the page
+
+### Optional Compression
+
+To reduce storage for semantic search embeddings (~50% savings):
+
+1. Go to `/style` > Search > AI
+2. Enable "compress embeddings"
+3. Re-index your existing pages for the compression to apply to existing embeddings
+
 ## Dependencies
 
 - [requests](https://docs.python-requests.org/) — HTTP fetching
diff --git a/db.py b/db.py
index 295da86..058822c 100644
--- a/db.py
+++ b/db.py
@@ -97,7 +97,7 @@ def clean_url(url):
 
 _pool = []
 _pool_lock = __import__("threading").Lock()
-_POOL_SIZE = 4
+_POOL_SIZE = 16
 
 
 def get_db():
@@ -271,8 +271,15 @@ def init_db():
     )
     db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_page ON chunks(page_id)")
     db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_remote ON chunks(remote_page_id)")
+    db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_page_idx ON chunks(page_id, chunk_index)")
+    db.execute("CREATE INDEX IF NOT EXISTS idx_pages_url ON pages(url)")
+    db.execute("CREATE INDEX IF NOT EXISTS idx_pages_modified ON pages(last_modified)")
+    db.execute("CREATE INDEX IF NOT EXISTS idx_page_tags_page ON page_tags(page_id)")
+    db.execute("CREATE INDEX IF NOT EXISTS idx_page_tags_tag ON page_tags(tag_id)")
 
     db.execute("PRAGMA journal_mode=WAL")
+    db.execute("PRAGMA synchronous=NORMAL")
+    db.execute("PRAGMA cache_size=-64000")
     db.commit()
     db.close()
 
@@ -286,6 +293,16 @@ def get_setting(key, default=""):
         return_db(db)
 
 
+def vacuum_db():
+    """Run VACUUM and WAL checkpoint to reclaim space after deletions."""
+    db = get_db()
+    try:
+        db.execute("PRAGMA wal_checkpoint(TRUNCATE)")
+        db.execute("VACUUM")
+    finally:
+        return_db(db)
+
+
 def set_setting(key, value):
     db = get_db()
     try:
diff --git a/embeddings.py b/embeddings.py
index 302a31f..0362945 100644
--- a/embeddings.py
+++ b/embeddings.py
@@ -233,24 +233,42 @@ def embed(texts, is_query=False):
                 "token_type_ids": token_type_ids,
             },
         )
-        # CLS token pooling — take the first token's hidden state
         emb = outputs[0][:, 0, :]
         all_embeddings.append(emb)
 
     embeddings = np.concatenate(all_embeddings, axis=0)
-    # L2 normalize
     norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
     norms = np.maximum(norms, 1e-12)
     embeddings = embeddings / norms
-    return embeddings.astype(np.float32)
+    return _maybe_compress(embeddings.astype(np.float32))
+
+
+def _maybe_compress(embeddings):
+    """Compress embeddings to float16 if compression is enabled."""
+    try:
+        from db import get_setting
+        if get_setting("compress_embeddings", "0") == "1":
+            return embeddings.astype(np.float16)
+    except Exception:
+        pass
+    return embeddings
+
+
+def _decompress(embeddings):
+    """Decompress float16 embeddings to float32 if needed."""
+    if embeddings.dtype == np.float16:
+        return embeddings.astype(np.float32)
+    return embeddings
 
 
 # ---------------------------------------------------------------------------
 # HNSW index management
 # ---------------------------------------------------------------------------
 
+BATCH_SIZE = 50000
+
 def build_index(db=None):
-    """Load all embeddings from chunks table and build HNSW index."""
+    """Load all embeddings from chunks table and build HNSW index in batches."""
     import hnswlib
     global _hnsw_index, _hnsw_ids
 
@@ -258,29 +276,49 @@ def build_index(db=None):
     own_db = db is None
     if own_db:
         db = get_db()
+
     try:
-        rows = db.execute("SELECT id, embedding FROM chunks ORDER BY id").fetchall()
+        total = db.execute("SELECT COUNT(*) FROM chunks").fetchone()[0]
+        if total == 0:
+            with _hnsw_lock:
+                _hnsw_index = None
+                _hnsw_ids = []
+            return
+
+        all_ids = []
+        all_embeddings = []
+
+        for offset in range(0, total, BATCH_SIZE):
+            rows = db.execute(
+                "SELECT id, embedding FROM chunks ORDER BY id LIMIT ? OFFSET ?",
+                (BATCH_SIZE, offset),
+            ).fetchall()
+            for r in rows:
+                emb = np.frombuffer(r["embedding"], dtype=np.float32)
+                if emb.dtype == np.float16:
+                    emb = emb.astype(np.float32)
+                all_ids.append(r["id"])
+                all_embeddings.append(emb)
     finally:
         if own_db:
             return_db(db)
 
-    with _hnsw_lock:
-        if not rows:
+    if not all_ids:
+        with _hnsw_lock:
             _hnsw_index = None
             _hnsw_ids = []
-            return
+        return
 
-        n = len(rows)
-        ids = [r["id"] for r in rows]
-        matrix = np.frombuffer(b"".join(r["embedding"] for r in rows), dtype=np.float32).reshape(n, DIMS)
+    matrix = np.stack(all_embeddings)
+    n = len(all_ids)
+    ids = all_ids
 
-        index = hnswlib.Index(space="cosine", dim=DIMS)
-        # ef_construction and M balance build speed vs recall;
-        # these defaults give >99% recall at reasonable build time
-        index.init_index(max_elements=max(n, 1024), ef_construction=200, M=16)
-        index.add_items(matrix, list(range(n)))
-        index.set_ef(50)  # query-time accuracy parameter
+    index = hnswlib.Index(space="cosine", dim=DIMS)
+    index.init_index(max_elements=max(n, 1024), ef_construction=200, M=16)
+    index.add_items(matrix, list(range(n)))
+    index.set_ef(50)
 
+    with _hnsw_lock:
         _hnsw_index = index
         _hnsw_ids = ids
 
@@ -319,8 +357,8 @@ def store_embeddings(page_id, title, body, db):
         return
 
     embeddings_matrix = embed(chunks)
+    embeddings_matrix = _decompress(embeddings_matrix)
 
-    # Delete old chunks for this page
     db.execute("DELETE FROM chunks WHERE page_id = ?", (page_id,))
 
     new_ids = []
@@ -343,6 +381,7 @@ def store_remote_embeddings(remote_page_id, title, note, db):
         return
 
     embeddings_matrix = embed([text])
+    embeddings_matrix = _decompress(embeddings_matrix)
 
     db.execute("DELETE FROM chunks WHERE remote_page_id = ?", (remote_page_id,))
     cursor = db.execute(
diff --git a/handlers.py b/handlers.py
index c3240ce..353d86f 100644
--- a/handlers.py
+++ b/handlers.py
@@ -684,10 +684,16 @@ def handle_bookmark(query):
     return _text_response(msg, headers={"Access-Control-Allow-Origin": "*"})
 
 
+MAX_EXPORT = 10000
+
 def handle_export():
+    batch = int((query or {}).get("batch", ["0"])[0])
     db = get_db()
     try:
-        rows = db.execute("SELECT url, title, note FROM pages ORDER BY id").fetchall()
+        rows = db.execute(
+            "SELECT url, title, note FROM pages ORDER BY id LIMIT ? OFFSET ?",
+            (MAX_EXPORT, batch * MAX_EXPORT),
+        ).fetchall()
     finally:
         return_db(db)
     data = [{"url": r["url"], "title": r["title"], "note": r["note"]} for r in rows]
@@ -752,6 +758,8 @@ def handle_style_form(msg=""):
     dimmed = ' style="opacity:0.4"' if semantic != "1" else ""
     transport_host = get_setting("transport_host", "reticulum.derickphan.com")
     transport_port = get_setting("transport_port", "4242")
+    compress = get_setting("compress_embeddings", "0")
+    compress_checked = " checked" if compress == "1" else ""
     return _respond(
         f"<h1>customize</h1>"
         f"<h2>name your search engine</h2>"
@@ -778,6 +786,9 @@ def handle_style_form(msg=""):
         f'<label><input type="checkbox" id="reranker" name="use_reranker" value="1"{reranker_checked}{disabled}>'
         f" cross-encoder reranking (more accurate)</label><br>"
         f"<small>Uses a 22MB model. Adds ~50ms per search. Disable for faster results.</small><br><br>"
+        f'<label><input type="checkbox" name="compress_embeddings" value="1"{compress_checked}{disabled}>'
+        f" compress embeddings (50% storage savings)</label><br>"
+        f"<small>Saves ~50% on storage for embeddings. Slight quality reduction at large scale.</small><br><br>"
         f'<a href="/reindex">manage semantic index</a><br><br>'
         f"</div>"
         f"<h2>custom html</h2>"
@@ -794,6 +805,11 @@ def handle_style_form(msg=""):
         f'{_csrf_field()}'
         f'<button type="submit">reset template to default</button>'
         f"</form>"
+        f"<h2>maintenance</h2>"
+        f'<form method="post" action="/style/vacuum">'
+        f'{_csrf_field()}'
+        f'<button type="submit">vacuum database</button>'
+        f"</form>"
         f"<p>{msg}</p>"
         f'<a href="/">back</a>',
         use_default=True,
@@ -806,6 +822,7 @@ def handle_style_submit(body):
     sharing = "1" if body.get("sharing_enabled") else "0"
     semantic = "1" if body.get("semantic_search") else "0"
     reranker = "1" if body.get("use_reranker") else "0"
+    compress = "1" if body.get("compress_embeddings") else "0"
     transport_host = body.get("transport_host", [""])[0].strip()
     transport_port = body.get("transport_port", [""])[0].strip()
     set_setting("custom_template", template if template.strip() != DEFAULT_TEMPLATE.strip() else "")
@@ -813,6 +830,7 @@ def handle_style_submit(body):
     set_setting("sharing_enabled", sharing)
     set_setting("semantic_search", semantic)
     set_setting("use_reranker", reranker)
+    set_setting("compress_embeddings", compress)
     if transport_host:
         set_setting("transport_host", transport_host)
     if transport_port:
@@ -930,6 +948,8 @@ def handle_tag_browse(tag_name, query=None):
     )
 
 
+MAX_API_SITES = 5000
+
 def handle_api_sites(query=None):
     if get_setting("sharing_enabled", "0") != "1":
         return _json_response(
@@ -943,11 +963,14 @@ def handle_api_sites(query=None):
         if since:
             rows = db.execute(
                 "SELECT id, url, title, note, last_modified FROM pages "
-                "WHERE last_modified > ? ORDER BY id DESC",
-                (since,),
+                "WHERE last_modified > ? ORDER BY id DESC LIMIT ?",
+                (since, MAX_API_SITES),
             ).fetchall()
         else:
-            rows = db.execute("SELECT id, url, title, note, last_modified FROM pages ORDER BY id DESC").fetchall()
+            rows = db.execute(
+                "SELECT id, url, title, note, last_modified FROM pages ORDER BY id DESC LIMIT ?",
+                (MAX_API_SITES,),
+            ).fetchall()
         sites = []
         for r in rows:
             tags = _get_page_tags(r["id"], db)
@@ -955,8 +978,10 @@ def handle_api_sites(query=None):
                 "url": r["url"], "title": r["title"], "note": r["note"],
                 "tags": tags, "last_modified": r["last_modified"] or "",
             })
-        # Include list of all current URLs so subscriber can detect deletions
-        all_urls = [r["url"] for r in db.execute("SELECT url FROM pages").fetchall()] if not since else None
+        # Include list of all current URLs so subscriber can detect deletions (limited)
+        all_urls = None
+        if not since:
+            all_urls = [r["url"] for r in db.execute("SELECT url FROM pages LIMIT ?", (MAX_API_SITES,)).fetchall()]
     finally:
         return_db(db)
     data = {"name": get_site_name(), "sites": sites}
@@ -1040,18 +1065,20 @@ def handle_subscription_add(body):
     return handle_subscriptions(f"Subscribed to {esc(name or dest_hash)}.")
 
 
+MAX_BROWSE = 5000
+
 def handle_subscription_browse(sub_id):
     db = get_db()
     try:
         sub = db.execute("SELECT * FROM subscriptions WHERE id = ?", (sub_id,)).fetchone()
         if not sub:
             return _error(404)
-        local_urls = set(r["url"] for r in db.execute("SELECT url FROM pages").fetchall())
+        local_urls = set(r["url"] for r in db.execute("SELECT url FROM pages LIMIT ?", (MAX_BROWSE,)).fetchall())
 
         # Use locally synced data if available, otherwise fetch live
         remote_rows = db.execute(
-            "SELECT url, title, note, tags FROM remote_pages WHERE subscription_id = ?",
-            (sub_id,),
+            "SELECT url, title, note, tags FROM remote_pages WHERE subscription_id = ? LIMIT ?",
+            (sub_id, MAX_BROWSE),
         ).fetchall()
     finally:
         return_db(db)
@@ -1121,7 +1148,7 @@ def handle_subscription_pick(body):
         remote_tags = {r["url"]: r["tags"] for r in remote_rows}
 
         if import_all:
-            local_urls = set(r["url"] for r in db.execute("SELECT url FROM pages").fetchall())
+            local_urls = set(r["url"] for r in db.execute("SELECT url FROM pages LIMIT ?", (MAX_BROWSE,)).fetchall())
             urls = [r["url"] for r in remote_rows if r["url"] not in local_urls]
         else:
             urls = body.get("urls", [])
@@ -1425,6 +1452,10 @@ def _dispatch_inner(data):
         elif path == "/style/reset":
             set_setting("custom_template", "")
             return handle_style_form("Template reset to default.")
+        elif path == "/style/vacuum":
+            from db import vacuum_db
+            vacuum_db()
+            return handle_style_form("Database vacuumed.")
         elif path == "/import":
             return handle_import_submit(body)
         elif path == "/reindex":