Updated manual entry
This commit is contained in:
parent
426aa670fa
commit
6119ed3aef
1 changed files with 5 additions and 1 deletions
6
db.py
6
db.py
|
|
@ -29,6 +29,10 @@ def _is_blocked_response(html, status_code):
|
||||||
return True
|
return True
|
||||||
if "enable javascript and cookies" in html_lower:
|
if "enable javascript and cookies" in html_lower:
|
||||||
return True
|
return True
|
||||||
|
if "request rejected" in html_lower:
|
||||||
|
return True
|
||||||
|
if "access denied" in html_lower:
|
||||||
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -295,7 +299,7 @@ def fetch_page(url):
|
||||||
resp = requests.get(url, timeout=10, headers={"User-Agent": "TinyWeb/1.0"}, allow_redirects=False)
|
resp = requests.get(url, timeout=10, headers={"User-Agent": "TinyWeb/1.0"}, allow_redirects=False)
|
||||||
|
|
||||||
if _is_blocked_response(resp.text, resp.status_code):
|
if _is_blocked_response(resp.text, resp.status_code):
|
||||||
raise Exception("Site blocks automated access (Cloudflare/CDN protection)")
|
raise Exception(f"Site blocks automated access: {resp.status_code}")
|
||||||
|
|
||||||
# Follow redirects manually, re-validating each target
|
# Follow redirects manually, re-validating each target
|
||||||
max_redirects = 5
|
max_redirects = 5
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue