Added manual entry

This commit is contained in:
blankie 2026-03-28 21:24:10 -07:00
parent c959ee98ae
commit 5593d802b3
2 changed files with 76 additions and 2 deletions

16
db.py
View file

@ -20,6 +20,18 @@ BLOCKED_NETWORKS = [
] ]
def _is_blocked_response(html, status_code):
"""Check if response is a CDN challenge/block page."""
if status_code == 403:
return True
html_lower = html.lower()
if "just a moment" in html_lower or "cloudflare" in html_lower:
return True
if "enable javascript and cookies" in html_lower:
return True
return False
def _validate_url_target(url): def _validate_url_target(url):
"""Resolve hostname and block private/internal IPs to prevent SSRF.""" """Resolve hostname and block private/internal IPs to prevent SSRF."""
parsed = urlparse(url) parsed = urlparse(url)
@ -281,6 +293,10 @@ def get_site_name():
def fetch_page(url): def fetch_page(url):
_validate_url_target(url) _validate_url_target(url)
resp = requests.get(url, timeout=10, headers={"User-Agent": "TinyWeb/1.0"}, allow_redirects=False) resp = requests.get(url, timeout=10, headers={"User-Agent": "TinyWeb/1.0"}, allow_redirects=False)
if _is_blocked_response(resp.text, resp.status_code):
raise Exception("Site blocks automated access (Cloudflare/CDN protection)")
# Follow redirects manually, re-validating each target # Follow redirects manually, re-validating each target
max_redirects = 5 max_redirects = 5
while resp.is_redirect and max_redirects > 0: while resp.is_redirect and max_redirects > 0:

View file

@ -357,10 +357,13 @@ def handle_add_submit(body):
url = clean_url(body.get("url", [""])[0].strip()) url = clean_url(body.get("url", [""])[0].strip())
note = body.get("note", [""])[0].strip() note = body.get("note", [""])[0].strip()
tags = body.get("tags", [""])[0].strip() tags = body.get("tags", [""])[0].strip()
if not url: if not url:
return handle_add_form("URL is required.") return handle_add_form("URL is required.")
if not url.startswith(("http://", "https://")): if not url.startswith(("http://", "https://")):
return handle_add_form("URL must start with http:// or https://") return handle_add_form("URL must start with http:// or https://")
# Try auto-index first
try: try:
title = index_url(url, note) title = index_url(url, note)
if tags: if tags:
@ -373,10 +376,63 @@ def handle_add_submit(body):
finally: finally:
return_db(db) return_db(db)
return handle_add_form(f'Indexed: <a href="{esc(url)}">{esc(title)}</a>') return handle_add_form(f'Indexed: <a href="{esc(url)}">{esc(title)}</a>')
except ValueError as e: except ValueError as e:
return handle_add_form(f"Error: {esc(str(e))}") return handle_add_form(f"Error: {esc(str(e))}")
except Exception:
return handle_add_form("Error: could not fetch or index that URL.") except Exception as e:
error_msg = str(e).lower()
# Check if it's a block response
if "block" in error_msg or "cloudflare" in error_msg or "403" in error_msg:
# Show manual entry form for blocked sites
return _respond(
f"<h1>add url (manual entry)</h1>"
f"<p><strong>{esc(url)}</strong> blocks automated access. "
f"You can still save it manually:</p>"
f'<form method="post" action="/add/manual">'
f'{_csrf_field()}'
f'<input type="hidden" name="url" value="{esc(url)}">'
f'<input type="hidden" name="note" value="{esc(note)}">'
f'<input type="hidden" name="tags" value="{esc(tags)}">'
f'<label>Title:</label><br>'
f'<input name="manual_title" size="50" placeholder="page title" required><br><br>'
f'<label>Description:</label><br>'
f'<textarea name="manual_description" rows="4" cols="50" placeholder="what is this site about?" required></textarea><br><br>'
f'<button type="submit">save manually</button>'
f"</form>"
f'<a href="/">back</a>'
)
return handle_add_form(f"Error: could not fetch or index that URL. {esc(str(e)[:100])}")
def handle_add_manual_submit(body):
url = clean_url(body.get("url", [""])[0].strip())
note = body.get("note", [""])[0].strip()
tags = body.get("tags", [""])[0].strip()
manual_title = body.get("manual_title", [""])[0].strip()
manual_desc = body.get("manual_description", [""])[0].strip()
if not url:
return handle_add_form("URL is required.")
if not manual_title or not manual_desc:
return handle_add_form("Title and description are required for manual entry.")
db = get_db()
try:
now = __import__("datetime").datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
db.execute(
"INSERT INTO pages (url, title, body, note, last_modified, summary) VALUES (?, ?, ?, ?, ?, ?) "
"ON CONFLICT(url) DO UPDATE SET title=excluded.title, body=excluded.body, "
"note=excluded.note, last_modified=excluded.last_modified, summary=excluded.summary",
(url, manual_title, manual_desc, note, now, manual_desc[:200]),
)
page_id = db.execute("SELECT id FROM pages WHERE url = ?", (url,)).fetchone()[0]
if tags:
_set_page_tags(page_id, tags, db)
db.commit()
return handle_add_form(f'Added manually: <a href="{esc(url)}">{esc(manual_title)}</a>')
finally:
return_db(db)
def handle_pages(query=None): def handle_pages(query=None):
@ -1209,6 +1265,8 @@ def _dispatch_inner(data):
return _respond("<h1>403 Forbidden</h1><p>Invalid or missing CSRF token.</p>", status=403) return _respond("<h1>403 Forbidden</h1><p>Invalid or missing CSRF token.</p>", status=403)
if path == "/add": if path == "/add":
return handle_add_submit(body) return handle_add_submit(body)
elif path == "/add/manual":
return handle_add_manual_submit(body)
elif path.startswith("/edit/"): elif path.startswith("/edit/"):
pid = extract_id("/edit/") pid = extract_id("/edit/")
return handle_edit_submit(pid, body) if pid is not None else _error(400) return handle_edit_submit(pid, body) if pid is not None else _error(400)