fixed edge-case domains

This commit is contained in:
lichenblankie 2026-04-18 22:52:24 +00:00
parent 4064a46c8a
commit 2dbbc5a538

8
db.py
View file

@ -70,10 +70,16 @@ def clean_url(url):
# Prefer https # Prefer https
scheme = "https" if parsed.scheme in ("http", "https") else parsed.scheme scheme = "https" if parsed.scheme in ("http", "https") else parsed.scheme
# Normalize hostname: lowercase, strip www. # Normalize hostname: lowercase, strip www (only if non-www resolves)
hostname = (parsed.hostname or "").lower() hostname = (parsed.hostname or "").lower()
original_hostname = hostname
if hostname.startswith("www."): if hostname.startswith("www."):
hostname = hostname[4:] hostname = hostname[4:]
port = parsed.port or (443 if scheme == "https" else 80)
try:
socket.getaddrinfo(hostname, port, proto=socket.IPPROTO_TCP)
except socket.gaierror:
hostname = original_hostname
# Preserve explicit non-default ports # Preserve explicit non-default ports
port = parsed.port port = parsed.port