This commit is contained in:
2026-01-12 13:51:26 -08:00
parent b7b58f0e42
commit 065ceeb1da
5 changed files with 172 additions and 165 deletions

View File

@@ -335,7 +335,22 @@ def normalize_urls(value: Any) -> List[str]:
" ").replace(",",
" ").split():
if token:
yield token
t_low = token.lower()
# Heuristic: only yield tokens that look like URLs or common address patterns.
# This prevents plain tags (e.g. "tag1, tag2") from leaking into URL fields.
is_p_url = t_low.startswith(("http://",
"https://",
"magnet:",
"torrent:",
"ytdl://",
"data:",
"ftp:",
"sftp:"))
is_struct_url = ("." in token and "/" in token
and not token.startswith((".",
"/")))
if is_p_url or is_struct_url:
yield token
return
if isinstance(raw, (list, tuple, set)):