From 6ffd38d58cd0b4ed0e4b69532b028578b2a17705 Mon Sep 17 00:00:00 2001 From: lichenblankie Date: Sat, 18 Apr 2026 22:52:24 +0000 Subject: [PATCH] Fixed edge case domains --- db.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/db.py b/db.py index 058822c..9ae6f55 100644 --- a/db.py +++ b/db.py @@ -70,10 +70,16 @@ def clean_url(url): # Prefer https scheme = "https" if parsed.scheme in ("http", "https") else parsed.scheme - # Normalize hostname: lowercase, strip www. + # Normalize hostname: lowercase, strip www (only if non-www resolves) hostname = (parsed.hostname or "").lower() + original_hostname = hostname if hostname.startswith("www."): hostname = hostname[4:] + port = parsed.port or (443 if scheme == "https" else 80) + try: + socket.getaddrinfo(hostname, port, proto=socket.IPPROTO_TCP) + except socket.gaierror: + hostname = original_hostname # Preserve explicit non-default ports port = parsed.port