This commit is contained in:
parent
552311b730
commit
8ecb963be4
4 changed files with 172 additions and 29 deletions
56
README.md
56
README.md
|
|
@ -12,6 +12,30 @@ A personal, decentralized search engine built on the [Reticulum](https://reticul
|
||||||
- **Import/export** — JSON-based backup and restore
|
- **Import/export** — JSON-based backup and restore
|
||||||
- **Mesh-native** — Works over Reticulum without the internet; encrypted and decentralized by default
|
- **Mesh-native** — Works over Reticulum without the internet; encrypted and decentralized by default
|
||||||
|
|
||||||
|
## Performance & Scale
|
||||||
|
|
||||||
|
### Search Speed
|
||||||
|
|
||||||
|
| Pages indexed | Search speed | Notes |
|
||||||
|
|--------------|-------------|-------|
|
||||||
|
| 1,000 | ~50ms | Fast local FTS5 |
|
||||||
|
| 10,000 | ~50-100ms | Full-text search |
|
||||||
|
| 100,000 | ~100-200ms | Combined BM25 + semantic |
|
||||||
|
| 500,000 | ~200-400ms | With semantic enabled |
|
||||||
|
| 1,000,000 | ~300-500ms | Hybrid search |
|
||||||
|
|
||||||
|
*Times are estimates for combined BM25 + semantic search. Actual performance varies by hardware, storage type (SSD/HDD), and search complexity.*
|
||||||
|
|
||||||
|
### Concurrent Connections
|
||||||
|
|
||||||
|
- Database pool: 16 simultaneous connections
|
||||||
|
- Suitable for single-user + a few subscriptions
|
||||||
|
|
||||||
|
### Export
|
||||||
|
|
||||||
|
- Paginated at 10,000 pages per request
|
||||||
|
- Use `?batch=N` to export in chunks: `/export?batch=0`, `/export?batch=1`, etc.
|
||||||
|
|
||||||
## Download (pre-built binaries)
|
## Download (pre-built binaries)
|
||||||
|
|
||||||
Download the latest release for your platform from the [Releases](https://git.derickphan.com/lichenblankie/tinyweb/releases) page:
|
Download the latest release for your platform from the [Releases](https://git.derickphan.com/lichenblankie/tinyweb/releases) page:
|
||||||
|
|
@ -55,6 +79,21 @@ volumes:
|
||||||
|
|
||||||
Run with `docker compose up -d`.
|
Run with `docker compose up -d`.
|
||||||
|
|
||||||
|
### Storage Estimates
|
||||||
|
|
||||||
|
Average web page content is ~15KB per page:
|
||||||
|
|
||||||
|
| Pages | Database | Embeddings* | Total |
|
||||||
|
|-------|----------|------------|-------|
|
||||||
|
| 10,000 | 150MB | 80MB | ~250MB |
|
||||||
|
| 100,000 | 1.5GB | 800MB | ~2.5GB |
|
||||||
|
| 500,000 | 7.5GB | 4GB | ~12GB |
|
||||||
|
| 1,000,000 | 15GB | 8GB | ~25GB |
|
||||||
|
|
||||||
|
*Embeddings require semantic search to be enabled. With compression enabled (Settings > Search > AI), embeddings use ~50% less storage.
|
||||||
|
|
||||||
|
Enable optional compression in Settings > Search > AI to reduce embedding storage by ~50%.
|
||||||
|
|
||||||
## Data storage
|
## Data storage
|
||||||
|
|
||||||
### Local (Python/binary)
|
### Local (Python/binary)
|
||||||
|
|
@ -139,6 +178,23 @@ TinyWeb includes several hardening measures:
|
||||||
- **Bookmark authentication** — The bookmarklet endpoint requires a secret token
|
- **Bookmark authentication** — The bookmarklet endpoint requires a secret token
|
||||||
- **Identity file protection** — The Reticulum identity key is restricted to owner-only permissions (0600)
|
- **Identity file protection** — The Reticulum identity key is restricted to owner-only permissions (0600)
|
||||||
|
|
||||||
|
## Maintenance
|
||||||
|
|
||||||
|
### Database Vacuum
|
||||||
|
|
||||||
|
Over time, deleted pages leave empty space in the database. Run the vacuum tool periodically to reclaim space:
|
||||||
|
|
||||||
|
1. Go to `/style` in your browser
|
||||||
|
2. Click "vacuum database" at the bottom of the page
|
||||||
|
|
||||||
|
### Optional Compression
|
||||||
|
|
||||||
|
To reduce storage for semantic search embeddings (~50% savings):
|
||||||
|
|
||||||
|
1. Go to `/style` > Search > AI
|
||||||
|
2. Enable "compress embeddings"
|
||||||
|
3. Re-index your existing pages for the compression to apply to existing embeddings
|
||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
- [requests](https://docs.python-requests.org/) — HTTP fetching
|
- [requests](https://docs.python-requests.org/) — HTTP fetching
|
||||||
|
|
|
||||||
19
db.py
19
db.py
|
|
@ -97,7 +97,7 @@ def clean_url(url):
|
||||||
|
|
||||||
_pool = []
|
_pool = []
|
||||||
_pool_lock = __import__("threading").Lock()
|
_pool_lock = __import__("threading").Lock()
|
||||||
_POOL_SIZE = 4
|
_POOL_SIZE = 16
|
||||||
|
|
||||||
|
|
||||||
def get_db():
|
def get_db():
|
||||||
|
|
@ -271,8 +271,15 @@ def init_db():
|
||||||
)
|
)
|
||||||
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_page ON chunks(page_id)")
|
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_page ON chunks(page_id)")
|
||||||
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_remote ON chunks(remote_page_id)")
|
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_remote ON chunks(remote_page_id)")
|
||||||
|
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_page_idx ON chunks(page_id, chunk_index)")
|
||||||
|
db.execute("CREATE INDEX IF NOT EXISTS idx_pages_url ON pages(url)")
|
||||||
|
db.execute("CREATE INDEX IF NOT EXISTS idx_pages_modified ON pages(last_modified)")
|
||||||
|
db.execute("CREATE INDEX IF NOT EXISTS idx_page_tags_page ON page_tags(page_id)")
|
||||||
|
db.execute("CREATE INDEX IF NOT EXISTS idx_page_tags_tag ON page_tags(tag_id)")
|
||||||
|
|
||||||
db.execute("PRAGMA journal_mode=WAL")
|
db.execute("PRAGMA journal_mode=WAL")
|
||||||
|
db.execute("PRAGMA synchronous=NORMAL")
|
||||||
|
db.execute("PRAGMA cache_size=-64000")
|
||||||
db.commit()
|
db.commit()
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
|
|
@ -286,6 +293,16 @@ def get_setting(key, default=""):
|
||||||
return_db(db)
|
return_db(db)
|
||||||
|
|
||||||
|
|
||||||
|
def vacuum_db():
|
||||||
|
"""Run VACUUM and WAL checkpoint to reclaim space after deletions."""
|
||||||
|
db = get_db()
|
||||||
|
try:
|
||||||
|
db.execute("PRAGMA wal_checkpoint(TRUNCATE)")
|
||||||
|
db.execute("VACUUM")
|
||||||
|
finally:
|
||||||
|
return_db(db)
|
||||||
|
|
||||||
|
|
||||||
def set_setting(key, value):
|
def set_setting(key, value):
|
||||||
db = get_db()
|
db = get_db()
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -233,24 +233,42 @@ def embed(texts, is_query=False):
|
||||||
"token_type_ids": token_type_ids,
|
"token_type_ids": token_type_ids,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
# CLS token pooling — take the first token's hidden state
|
|
||||||
emb = outputs[0][:, 0, :]
|
emb = outputs[0][:, 0, :]
|
||||||
all_embeddings.append(emb)
|
all_embeddings.append(emb)
|
||||||
|
|
||||||
embeddings = np.concatenate(all_embeddings, axis=0)
|
embeddings = np.concatenate(all_embeddings, axis=0)
|
||||||
# L2 normalize
|
|
||||||
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
|
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
|
||||||
norms = np.maximum(norms, 1e-12)
|
norms = np.maximum(norms, 1e-12)
|
||||||
embeddings = embeddings / norms
|
embeddings = embeddings / norms
|
||||||
return embeddings.astype(np.float32)
|
return _maybe_compress(embeddings.astype(np.float32))
|
||||||
|
|
||||||
|
|
||||||
|
def _maybe_compress(embeddings):
|
||||||
|
"""Compress embeddings to float16 if compression is enabled."""
|
||||||
|
try:
|
||||||
|
from db import get_setting
|
||||||
|
if get_setting("compress_embeddings", "0") == "1":
|
||||||
|
return embeddings.astype(np.float16)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return embeddings
|
||||||
|
|
||||||
|
|
||||||
|
def _decompress(embeddings):
|
||||||
|
"""Decompress float16 embeddings to float32 if needed."""
|
||||||
|
if embeddings.dtype == np.float16:
|
||||||
|
return embeddings.astype(np.float32)
|
||||||
|
return embeddings
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# HNSW index management
|
# HNSW index management
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
BATCH_SIZE = 50000
|
||||||
|
|
||||||
def build_index(db=None):
|
def build_index(db=None):
|
||||||
"""Load all embeddings from chunks table and build HNSW index."""
|
"""Load all embeddings from chunks table and build HNSW index in batches."""
|
||||||
import hnswlib
|
import hnswlib
|
||||||
global _hnsw_index, _hnsw_ids
|
global _hnsw_index, _hnsw_ids
|
||||||
|
|
||||||
|
|
@ -258,29 +276,49 @@ def build_index(db=None):
|
||||||
own_db = db is None
|
own_db = db is None
|
||||||
if own_db:
|
if own_db:
|
||||||
db = get_db()
|
db = get_db()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
rows = db.execute("SELECT id, embedding FROM chunks ORDER BY id").fetchall()
|
total = db.execute("SELECT COUNT(*) FROM chunks").fetchone()[0]
|
||||||
|
if total == 0:
|
||||||
|
with _hnsw_lock:
|
||||||
|
_hnsw_index = None
|
||||||
|
_hnsw_ids = []
|
||||||
|
return
|
||||||
|
|
||||||
|
all_ids = []
|
||||||
|
all_embeddings = []
|
||||||
|
|
||||||
|
for offset in range(0, total, BATCH_SIZE):
|
||||||
|
rows = db.execute(
|
||||||
|
"SELECT id, embedding FROM chunks ORDER BY id LIMIT ? OFFSET ?",
|
||||||
|
(BATCH_SIZE, offset),
|
||||||
|
).fetchall()
|
||||||
|
for r in rows:
|
||||||
|
emb = np.frombuffer(r["embedding"], dtype=np.float32)
|
||||||
|
if emb.dtype == np.float16:
|
||||||
|
emb = emb.astype(np.float32)
|
||||||
|
all_ids.append(r["id"])
|
||||||
|
all_embeddings.append(emb)
|
||||||
finally:
|
finally:
|
||||||
if own_db:
|
if own_db:
|
||||||
return_db(db)
|
return_db(db)
|
||||||
|
|
||||||
with _hnsw_lock:
|
if not all_ids:
|
||||||
if not rows:
|
with _hnsw_lock:
|
||||||
_hnsw_index = None
|
_hnsw_index = None
|
||||||
_hnsw_ids = []
|
_hnsw_ids = []
|
||||||
return
|
return
|
||||||
|
|
||||||
n = len(rows)
|
matrix = np.stack(all_embeddings)
|
||||||
ids = [r["id"] for r in rows]
|
n = len(all_ids)
|
||||||
matrix = np.frombuffer(b"".join(r["embedding"] for r in rows), dtype=np.float32).reshape(n, DIMS)
|
ids = all_ids
|
||||||
|
|
||||||
index = hnswlib.Index(space="cosine", dim=DIMS)
|
index = hnswlib.Index(space="cosine", dim=DIMS)
|
||||||
# ef_construction and M balance build speed vs recall;
|
index.init_index(max_elements=max(n, 1024), ef_construction=200, M=16)
|
||||||
# these defaults give >99% recall at reasonable build time
|
index.add_items(matrix, list(range(n)))
|
||||||
index.init_index(max_elements=max(n, 1024), ef_construction=200, M=16)
|
index.set_ef(50)
|
||||||
index.add_items(matrix, list(range(n)))
|
|
||||||
index.set_ef(50) # query-time accuracy parameter
|
|
||||||
|
|
||||||
|
with _hnsw_lock:
|
||||||
_hnsw_index = index
|
_hnsw_index = index
|
||||||
_hnsw_ids = ids
|
_hnsw_ids = ids
|
||||||
|
|
||||||
|
|
@ -319,8 +357,8 @@ def store_embeddings(page_id, title, body, db):
|
||||||
return
|
return
|
||||||
|
|
||||||
embeddings_matrix = embed(chunks)
|
embeddings_matrix = embed(chunks)
|
||||||
|
embeddings_matrix = _decompress(embeddings_matrix)
|
||||||
|
|
||||||
# Delete old chunks for this page
|
|
||||||
db.execute("DELETE FROM chunks WHERE page_id = ?", (page_id,))
|
db.execute("DELETE FROM chunks WHERE page_id = ?", (page_id,))
|
||||||
|
|
||||||
new_ids = []
|
new_ids = []
|
||||||
|
|
@ -343,6 +381,7 @@ def store_remote_embeddings(remote_page_id, title, note, db):
|
||||||
return
|
return
|
||||||
|
|
||||||
embeddings_matrix = embed([text])
|
embeddings_matrix = embed([text])
|
||||||
|
embeddings_matrix = _decompress(embeddings_matrix)
|
||||||
|
|
||||||
db.execute("DELETE FROM chunks WHERE remote_page_id = ?", (remote_page_id,))
|
db.execute("DELETE FROM chunks WHERE remote_page_id = ?", (remote_page_id,))
|
||||||
cursor = db.execute(
|
cursor = db.execute(
|
||||||
|
|
|
||||||
51
handlers.py
51
handlers.py
|
|
@ -684,10 +684,16 @@ def handle_bookmark(query):
|
||||||
return _text_response(msg, headers={"Access-Control-Allow-Origin": "*"})
|
return _text_response(msg, headers={"Access-Control-Allow-Origin": "*"})
|
||||||
|
|
||||||
|
|
||||||
|
MAX_EXPORT = 10000
|
||||||
|
|
||||||
def handle_export():
|
def handle_export():
|
||||||
|
batch = int((query or {}).get("batch", ["0"])[0])
|
||||||
db = get_db()
|
db = get_db()
|
||||||
try:
|
try:
|
||||||
rows = db.execute("SELECT url, title, note FROM pages ORDER BY id").fetchall()
|
rows = db.execute(
|
||||||
|
"SELECT url, title, note FROM pages ORDER BY id LIMIT ? OFFSET ?",
|
||||||
|
(MAX_EXPORT, batch * MAX_EXPORT),
|
||||||
|
).fetchall()
|
||||||
finally:
|
finally:
|
||||||
return_db(db)
|
return_db(db)
|
||||||
data = [{"url": r["url"], "title": r["title"], "note": r["note"]} for r in rows]
|
data = [{"url": r["url"], "title": r["title"], "note": r["note"]} for r in rows]
|
||||||
|
|
@ -752,6 +758,8 @@ def handle_style_form(msg=""):
|
||||||
dimmed = ' style="opacity:0.4"' if semantic != "1" else ""
|
dimmed = ' style="opacity:0.4"' if semantic != "1" else ""
|
||||||
transport_host = get_setting("transport_host", "reticulum.derickphan.com")
|
transport_host = get_setting("transport_host", "reticulum.derickphan.com")
|
||||||
transport_port = get_setting("transport_port", "4242")
|
transport_port = get_setting("transport_port", "4242")
|
||||||
|
compress = get_setting("compress_embeddings", "0")
|
||||||
|
compress_checked = " checked" if compress == "1" else ""
|
||||||
return _respond(
|
return _respond(
|
||||||
f"<h1>customize</h1>"
|
f"<h1>customize</h1>"
|
||||||
f"<h2>name your search engine</h2>"
|
f"<h2>name your search engine</h2>"
|
||||||
|
|
@ -778,6 +786,9 @@ def handle_style_form(msg=""):
|
||||||
f'<label><input type="checkbox" id="reranker" name="use_reranker" value="1"{reranker_checked}{disabled}>'
|
f'<label><input type="checkbox" id="reranker" name="use_reranker" value="1"{reranker_checked}{disabled}>'
|
||||||
f" cross-encoder reranking (more accurate)</label><br>"
|
f" cross-encoder reranking (more accurate)</label><br>"
|
||||||
f"<small>Uses a 22MB model. Adds ~50ms per search. Disable for faster results.</small><br><br>"
|
f"<small>Uses a 22MB model. Adds ~50ms per search. Disable for faster results.</small><br><br>"
|
||||||
|
f'<label><input type="checkbox" name="compress_embeddings" value="1"{compress_checked}{disabled}>'
|
||||||
|
f" compress embeddings (50% storage savings)</label><br>"
|
||||||
|
f"<small>Saves ~50% on storage for embeddings. Slight quality reduction at large scale.</small><br><br>"
|
||||||
f'<a href="/reindex">manage semantic index</a><br><br>'
|
f'<a href="/reindex">manage semantic index</a><br><br>'
|
||||||
f"</div>"
|
f"</div>"
|
||||||
f"<h2>custom html</h2>"
|
f"<h2>custom html</h2>"
|
||||||
|
|
@ -794,6 +805,11 @@ def handle_style_form(msg=""):
|
||||||
f'{_csrf_field()}'
|
f'{_csrf_field()}'
|
||||||
f'<button type="submit">reset template to default</button>'
|
f'<button type="submit">reset template to default</button>'
|
||||||
f"</form>"
|
f"</form>"
|
||||||
|
f"<h2>maintenance</h2>"
|
||||||
|
f'<form method="post" action="/style/vacuum">'
|
||||||
|
f'{_csrf_field()}'
|
||||||
|
f'<button type="submit">vacuum database</button>'
|
||||||
|
f"</form>"
|
||||||
f"<p>{msg}</p>"
|
f"<p>{msg}</p>"
|
||||||
f'<a href="/">back</a>',
|
f'<a href="/">back</a>',
|
||||||
use_default=True,
|
use_default=True,
|
||||||
|
|
@ -806,6 +822,7 @@ def handle_style_submit(body):
|
||||||
sharing = "1" if body.get("sharing_enabled") else "0"
|
sharing = "1" if body.get("sharing_enabled") else "0"
|
||||||
semantic = "1" if body.get("semantic_search") else "0"
|
semantic = "1" if body.get("semantic_search") else "0"
|
||||||
reranker = "1" if body.get("use_reranker") else "0"
|
reranker = "1" if body.get("use_reranker") else "0"
|
||||||
|
compress = "1" if body.get("compress_embeddings") else "0"
|
||||||
transport_host = body.get("transport_host", [""])[0].strip()
|
transport_host = body.get("transport_host", [""])[0].strip()
|
||||||
transport_port = body.get("transport_port", [""])[0].strip()
|
transport_port = body.get("transport_port", [""])[0].strip()
|
||||||
set_setting("custom_template", template if template.strip() != DEFAULT_TEMPLATE.strip() else "")
|
set_setting("custom_template", template if template.strip() != DEFAULT_TEMPLATE.strip() else "")
|
||||||
|
|
@ -813,6 +830,7 @@ def handle_style_submit(body):
|
||||||
set_setting("sharing_enabled", sharing)
|
set_setting("sharing_enabled", sharing)
|
||||||
set_setting("semantic_search", semantic)
|
set_setting("semantic_search", semantic)
|
||||||
set_setting("use_reranker", reranker)
|
set_setting("use_reranker", reranker)
|
||||||
|
set_setting("compress_embeddings", compress)
|
||||||
if transport_host:
|
if transport_host:
|
||||||
set_setting("transport_host", transport_host)
|
set_setting("transport_host", transport_host)
|
||||||
if transport_port:
|
if transport_port:
|
||||||
|
|
@ -930,6 +948,8 @@ def handle_tag_browse(tag_name, query=None):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
MAX_API_SITES = 5000
|
||||||
|
|
||||||
def handle_api_sites(query=None):
|
def handle_api_sites(query=None):
|
||||||
if get_setting("sharing_enabled", "0") != "1":
|
if get_setting("sharing_enabled", "0") != "1":
|
||||||
return _json_response(
|
return _json_response(
|
||||||
|
|
@ -943,11 +963,14 @@ def handle_api_sites(query=None):
|
||||||
if since:
|
if since:
|
||||||
rows = db.execute(
|
rows = db.execute(
|
||||||
"SELECT id, url, title, note, last_modified FROM pages "
|
"SELECT id, url, title, note, last_modified FROM pages "
|
||||||
"WHERE last_modified > ? ORDER BY id DESC",
|
"WHERE last_modified > ? ORDER BY id DESC LIMIT ?",
|
||||||
(since,),
|
(since, MAX_API_SITES),
|
||||||
).fetchall()
|
).fetchall()
|
||||||
else:
|
else:
|
||||||
rows = db.execute("SELECT id, url, title, note, last_modified FROM pages ORDER BY id DESC").fetchall()
|
rows = db.execute(
|
||||||
|
"SELECT id, url, title, note, last_modified FROM pages ORDER BY id DESC LIMIT ?",
|
||||||
|
(MAX_API_SITES,),
|
||||||
|
).fetchall()
|
||||||
sites = []
|
sites = []
|
||||||
for r in rows:
|
for r in rows:
|
||||||
tags = _get_page_tags(r["id"], db)
|
tags = _get_page_tags(r["id"], db)
|
||||||
|
|
@ -955,8 +978,10 @@ def handle_api_sites(query=None):
|
||||||
"url": r["url"], "title": r["title"], "note": r["note"],
|
"url": r["url"], "title": r["title"], "note": r["note"],
|
||||||
"tags": tags, "last_modified": r["last_modified"] or "",
|
"tags": tags, "last_modified": r["last_modified"] or "",
|
||||||
})
|
})
|
||||||
# Include list of all current URLs so subscriber can detect deletions
|
# Include list of all current URLs so subscriber can detect deletions (limited)
|
||||||
all_urls = [r["url"] for r in db.execute("SELECT url FROM pages").fetchall()] if not since else None
|
all_urls = None
|
||||||
|
if not since:
|
||||||
|
all_urls = [r["url"] for r in db.execute("SELECT url FROM pages LIMIT ?", (MAX_API_SITES,)).fetchall()]
|
||||||
finally:
|
finally:
|
||||||
return_db(db)
|
return_db(db)
|
||||||
data = {"name": get_site_name(), "sites": sites}
|
data = {"name": get_site_name(), "sites": sites}
|
||||||
|
|
@ -1040,18 +1065,20 @@ def handle_subscription_add(body):
|
||||||
return handle_subscriptions(f"Subscribed to {esc(name or dest_hash)}.")
|
return handle_subscriptions(f"Subscribed to {esc(name or dest_hash)}.")
|
||||||
|
|
||||||
|
|
||||||
|
MAX_BROWSE = 5000
|
||||||
|
|
||||||
def handle_subscription_browse(sub_id):
|
def handle_subscription_browse(sub_id):
|
||||||
db = get_db()
|
db = get_db()
|
||||||
try:
|
try:
|
||||||
sub = db.execute("SELECT * FROM subscriptions WHERE id = ?", (sub_id,)).fetchone()
|
sub = db.execute("SELECT * FROM subscriptions WHERE id = ?", (sub_id,)).fetchone()
|
||||||
if not sub:
|
if not sub:
|
||||||
return _error(404)
|
return _error(404)
|
||||||
local_urls = set(r["url"] for r in db.execute("SELECT url FROM pages").fetchall())
|
local_urls = set(r["url"] for r in db.execute("SELECT url FROM pages LIMIT ?", (MAX_BROWSE,)).fetchall())
|
||||||
|
|
||||||
# Use locally synced data if available, otherwise fetch live
|
# Use locally synced data if available, otherwise fetch live
|
||||||
remote_rows = db.execute(
|
remote_rows = db.execute(
|
||||||
"SELECT url, title, note, tags FROM remote_pages WHERE subscription_id = ?",
|
"SELECT url, title, note, tags FROM remote_pages WHERE subscription_id = ? LIMIT ?",
|
||||||
(sub_id,),
|
(sub_id, MAX_BROWSE),
|
||||||
).fetchall()
|
).fetchall()
|
||||||
finally:
|
finally:
|
||||||
return_db(db)
|
return_db(db)
|
||||||
|
|
@ -1121,7 +1148,7 @@ def handle_subscription_pick(body):
|
||||||
remote_tags = {r["url"]: r["tags"] for r in remote_rows}
|
remote_tags = {r["url"]: r["tags"] for r in remote_rows}
|
||||||
|
|
||||||
if import_all:
|
if import_all:
|
||||||
local_urls = set(r["url"] for r in db.execute("SELECT url FROM pages").fetchall())
|
local_urls = set(r["url"] for r in db.execute("SELECT url FROM pages LIMIT ?", (MAX_BROWSE,)).fetchall())
|
||||||
urls = [r["url"] for r in remote_rows if r["url"] not in local_urls]
|
urls = [r["url"] for r in remote_rows if r["url"] not in local_urls]
|
||||||
else:
|
else:
|
||||||
urls = body.get("urls", [])
|
urls = body.get("urls", [])
|
||||||
|
|
@ -1425,6 +1452,10 @@ def _dispatch_inner(data):
|
||||||
elif path == "/style/reset":
|
elif path == "/style/reset":
|
||||||
set_setting("custom_template", "")
|
set_setting("custom_template", "")
|
||||||
return handle_style_form("Template reset to default.")
|
return handle_style_form("Template reset to default.")
|
||||||
|
elif path == "/style/vacuum":
|
||||||
|
from db import vacuum_db
|
||||||
|
vacuum_db()
|
||||||
|
return handle_style_form("Database vacuumed.")
|
||||||
elif path == "/import":
|
elif path == "/import":
|
||||||
return handle_import_submit(body)
|
return handle_import_submit(body)
|
||||||
elif path == "/reindex":
|
elif path == "/reindex":
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue