From 570d876b8e43965e4d604337888f9bc4bc687dcb Mon Sep 17 00:00:00 2001
From: Derick Phan <derickphan@fico.com>
Date: Fri, 27 Mar 2026 14:18:54 -0700
Subject: [PATCH] Strip noscript tags when parsing pages to remove JS-disabled
 messages

Lemmy and other JS-heavy sites include noscript fallback text like
"Javascript is disabled" that pollutes the stored body text and
generated snippets/summaries.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 db.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/db.py b/db.py
index 6b225a2..a6d8008 100644
--- a/db.py
+++ b/db.py
@@ -328,7 +328,7 @@ def fetch_page(url):
         if og_tag and og_tag.get("content"):
             meta_desc = og_tag["content"].strip()
 
-    for tag in soup(["script", "style", "nav", "footer", "header"]):
+    for tag in soup(["script", "style", "nav", "footer", "header", "noscript"]):
         tag.decompose()
     title = soup.title.string.strip() if soup.title and soup.title.string else url
     body = soup.get_text(separator=" ", strip=True)