added manual URL entry

This commit is contained in:
lichenblankie 2026-03-28 21:24:10 -07:00
parent 9bc5abd32f
commit a1358c1f3d
2 changed files with 99 additions and 2 deletions

20
db.py
View file

@ -20,6 +20,22 @@ BLOCKED_NETWORKS = [
]
def _is_blocked_response(html, status_code):
"""Check if response is a CDN challenge/block page."""
if status_code == 403:
return True
html_lower = html.lower()
if "just a moment" in html_lower or "cloudflare" in html_lower:
return True
if "enable javascript and cookies" in html_lower:
return True
if "request rejected" in html_lower:
return True
if "access denied" in html_lower:
return True
return False
def _validate_url_target(url):
"""Resolve hostname and block private/internal IPs to prevent SSRF."""
parsed = urlparse(url)
@ -281,6 +297,10 @@ def get_site_name():
def fetch_page(url):
_validate_url_target(url)
resp = requests.get(url, timeout=10, headers={"User-Agent": "TinyWeb/1.0"}, allow_redirects=False)
if _is_blocked_response(resp.text, resp.status_code):
raise Exception(f"Site blocks automated access: {resp.status_code}")
# Follow redirects manually, re-validating each target
max_redirects = 5
while resp.is_redirect and max_redirects > 0: