This commit is contained in:
2026-01-12 04:05:52 -08:00
parent 6076ea307b
commit 9981424397
11 changed files with 646 additions and 682 deletions

View File

@@ -54,6 +54,18 @@ def _extend_namespaced(
_append_unique(target, seen, f"{namespace}:{val}")
def _add_tag(tags: List[str], namespace: str, value: str) -> None:
"""Add a namespaced tag if not already present."""
if not namespace or not value:
return
normalized_value = value_normalize(value)
if not normalized_value:
return
candidate = f"{namespace}:{normalized_value}"
if candidate not in tags:
tags.append(candidate)
def _coerce_duration(metadata: Dict[str, Any]) -> Optional[float]:
for key in ("duration", "duration_seconds", "length", "duration_sec"):
value = metadata.get(key)
@@ -355,6 +367,17 @@ def normalize_urls(value: Any) -> List[str]:
if not u:
return None
# --- HEURISTIC FILTER ---
# Ensure it actually looks like a URL/identifier to avoid tag leakage.
# This prevents plain tags ("adam22", "10 books") from entering the URL list.
low = u.lower()
has_scheme = low.startswith((
"http://", "https://", "magnet:", "torrent:", "tidal:",
"hydrus:", "ytdl:", "soulseek:", "matrix:", "file:"
))
if not (has_scheme or "://" in low):
return None
# IMPORTANT: URLs can be case-sensitive in the path/query on some hosts
# (e.g., https://0x0.st/PzGY.webp). Do not lowercase or otherwise rewrite
# the URL here; preserve exact casing and percent-encoding.