fixed edge-case domains
This commit is contained in:
parent
4064a46c8a
commit
2dbbc5a538
1 changed files with 7 additions and 1 deletions
8
db.py
8
db.py
|
|
@ -70,10 +70,16 @@ def clean_url(url):
|
|||
# Prefer https
|
||||
scheme = "https" if parsed.scheme in ("http", "https") else parsed.scheme
|
||||
|
||||
# Normalize hostname: lowercase, strip www.
|
||||
# Normalize hostname: lowercase, strip www (only if non-www resolves)
|
||||
hostname = (parsed.hostname or "").lower()
|
||||
original_hostname = hostname
|
||||
if hostname.startswith("www."):
|
||||
hostname = hostname[4:]
|
||||
port = parsed.port or (443 if scheme == "https" else 80)
|
||||
try:
|
||||
socket.getaddrinfo(hostname, port, proto=socket.IPPROTO_TCP)
|
||||
except socket.gaierror:
|
||||
hostname = original_hostname
|
||||
|
||||
# Preserve explicit non-default ports
|
||||
port = parsed.port
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue