Fixed edge case domains
This commit is contained in:
parent
254cf562c3
commit
6ffd38d58c
1 changed files with 7 additions and 1 deletions
8
db.py
8
db.py
|
|
@ -70,10 +70,16 @@ def clean_url(url):
|
||||||
# Prefer https
|
# Prefer https
|
||||||
scheme = "https" if parsed.scheme in ("http", "https") else parsed.scheme
|
scheme = "https" if parsed.scheme in ("http", "https") else parsed.scheme
|
||||||
|
|
||||||
# Normalize hostname: lowercase, strip www.
|
# Normalize hostname: lowercase, strip www (only if non-www resolves)
|
||||||
hostname = (parsed.hostname or "").lower()
|
hostname = (parsed.hostname or "").lower()
|
||||||
|
original_hostname = hostname
|
||||||
if hostname.startswith("www."):
|
if hostname.startswith("www."):
|
||||||
hostname = hostname[4:]
|
hostname = hostname[4:]
|
||||||
|
port = parsed.port or (443 if scheme == "https" else 80)
|
||||||
|
try:
|
||||||
|
socket.getaddrinfo(hostname, port, proto=socket.IPPROTO_TCP)
|
||||||
|
except socket.gaierror:
|
||||||
|
hostname = original_hostname
|
||||||
|
|
||||||
# Preserve explicit non-default ports
|
# Preserve explicit non-default ports
|
||||||
port = parsed.port
|
port = parsed.port
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue