Added manual entry
This commit is contained in:
parent
c959ee98ae
commit
5593d802b3
2 changed files with 76 additions and 2 deletions
16
db.py
16
db.py
|
|
@ -20,6 +20,18 @@ BLOCKED_NETWORKS = [
|
|||
]
|
||||
|
||||
|
||||
def _is_blocked_response(html, status_code):
|
||||
"""Check if response is a CDN challenge/block page."""
|
||||
if status_code == 403:
|
||||
return True
|
||||
html_lower = html.lower()
|
||||
if "just a moment" in html_lower or "cloudflare" in html_lower:
|
||||
return True
|
||||
if "enable javascript and cookies" in html_lower:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _validate_url_target(url):
|
||||
"""Resolve hostname and block private/internal IPs to prevent SSRF."""
|
||||
parsed = urlparse(url)
|
||||
|
|
@ -281,6 +293,10 @@ def get_site_name():
|
|||
def fetch_page(url):
|
||||
_validate_url_target(url)
|
||||
resp = requests.get(url, timeout=10, headers={"User-Agent": "TinyWeb/1.0"}, allow_redirects=False)
|
||||
|
||||
if _is_blocked_response(resp.text, resp.status_code):
|
||||
raise Exception("Site blocks automated access (Cloudflare/CDN protection)")
|
||||
|
||||
# Follow redirects manually, re-validating each target
|
||||
max_redirects = 5
|
||||
while resp.is_redirect and max_redirects > 0:
|
||||
|
|
|
|||
62
handlers.py
62
handlers.py
|
|
@ -357,10 +357,13 @@ def handle_add_submit(body):
|
|||
url = clean_url(body.get("url", [""])[0].strip())
|
||||
note = body.get("note", [""])[0].strip()
|
||||
tags = body.get("tags", [""])[0].strip()
|
||||
|
||||
if not url:
|
||||
return handle_add_form("URL is required.")
|
||||
if not url.startswith(("http://", "https://")):
|
||||
return handle_add_form("URL must start with http:// or https://")
|
||||
|
||||
# Try auto-index first
|
||||
try:
|
||||
title = index_url(url, note)
|
||||
if tags:
|
||||
|
|
@ -373,10 +376,63 @@ def handle_add_submit(body):
|
|||
finally:
|
||||
return_db(db)
|
||||
return handle_add_form(f'Indexed: <a href="{esc(url)}">{esc(title)}</a>')
|
||||
|
||||
except ValueError as e:
|
||||
return handle_add_form(f"Error: {esc(str(e))}")
|
||||
except Exception:
|
||||
return handle_add_form("Error: could not fetch or index that URL.")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e).lower()
|
||||
# Check if it's a block response
|
||||
if "block" in error_msg or "cloudflare" in error_msg or "403" in error_msg:
|
||||
# Show manual entry form for blocked sites
|
||||
return _respond(
|
||||
f"<h1>add url (manual entry)</h1>"
|
||||
f"<p><strong>{esc(url)}</strong> blocks automated access. "
|
||||
f"You can still save it manually:</p>"
|
||||
f'<form method="post" action="/add/manual">'
|
||||
f'{_csrf_field()}'
|
||||
f'<input type="hidden" name="url" value="{esc(url)}">'
|
||||
f'<input type="hidden" name="note" value="{esc(note)}">'
|
||||
f'<input type="hidden" name="tags" value="{esc(tags)}">'
|
||||
f'<label>Title:</label><br>'
|
||||
f'<input name="manual_title" size="50" placeholder="page title" required><br><br>'
|
||||
f'<label>Description:</label><br>'
|
||||
f'<textarea name="manual_description" rows="4" cols="50" placeholder="what is this site about?" required></textarea><br><br>'
|
||||
f'<button type="submit">save manually</button>'
|
||||
f"</form>"
|
||||
f'<a href="/">back</a>'
|
||||
)
|
||||
return handle_add_form(f"Error: could not fetch or index that URL. {esc(str(e)[:100])}")
|
||||
|
||||
|
||||
def handle_add_manual_submit(body):
|
||||
url = clean_url(body.get("url", [""])[0].strip())
|
||||
note = body.get("note", [""])[0].strip()
|
||||
tags = body.get("tags", [""])[0].strip()
|
||||
manual_title = body.get("manual_title", [""])[0].strip()
|
||||
manual_desc = body.get("manual_description", [""])[0].strip()
|
||||
|
||||
if not url:
|
||||
return handle_add_form("URL is required.")
|
||||
if not manual_title or not manual_desc:
|
||||
return handle_add_form("Title and description are required for manual entry.")
|
||||
|
||||
db = get_db()
|
||||
try:
|
||||
now = __import__("datetime").datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
||||
db.execute(
|
||||
"INSERT INTO pages (url, title, body, note, last_modified, summary) VALUES (?, ?, ?, ?, ?, ?) "
|
||||
"ON CONFLICT(url) DO UPDATE SET title=excluded.title, body=excluded.body, "
|
||||
"note=excluded.note, last_modified=excluded.last_modified, summary=excluded.summary",
|
||||
(url, manual_title, manual_desc, note, now, manual_desc[:200]),
|
||||
)
|
||||
page_id = db.execute("SELECT id FROM pages WHERE url = ?", (url,)).fetchone()[0]
|
||||
if tags:
|
||||
_set_page_tags(page_id, tags, db)
|
||||
db.commit()
|
||||
return handle_add_form(f'Added manually: <a href="{esc(url)}">{esc(manual_title)}</a>')
|
||||
finally:
|
||||
return_db(db)
|
||||
|
||||
|
||||
def handle_pages(query=None):
|
||||
|
|
@ -1209,6 +1265,8 @@ def _dispatch_inner(data):
|
|||
return _respond("<h1>403 Forbidden</h1><p>Invalid or missing CSRF token.</p>", status=403)
|
||||
if path == "/add":
|
||||
return handle_add_submit(body)
|
||||
elif path == "/add/manual":
|
||||
return handle_add_manual_submit(body)
|
||||
elif path.startswith("/edit/"):
|
||||
pid = extract_id("/edit/")
|
||||
return handle_edit_submit(pid, body) if pid is not None else _error(400)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue