f
This commit is contained in:
@@ -54,6 +54,18 @@ def _extend_namespaced(
|
||||
_append_unique(target, seen, f"{namespace}:{val}")
|
||||
|
||||
|
||||
def _add_tag(tags: List[str], namespace: str, value: str) -> None:
|
||||
"""Add a namespaced tag if not already present."""
|
||||
if not namespace or not value:
|
||||
return
|
||||
normalized_value = value_normalize(value)
|
||||
if not normalized_value:
|
||||
return
|
||||
candidate = f"{namespace}:{normalized_value}"
|
||||
if candidate not in tags:
|
||||
tags.append(candidate)
|
||||
|
||||
|
||||
def _coerce_duration(metadata: Dict[str, Any]) -> Optional[float]:
|
||||
for key in ("duration", "duration_seconds", "length", "duration_sec"):
|
||||
value = metadata.get(key)
|
||||
@@ -355,6 +367,17 @@ def normalize_urls(value: Any) -> List[str]:
|
||||
if not u:
|
||||
return None
|
||||
|
||||
# --- HEURISTIC FILTER ---
|
||||
# Ensure it actually looks like a URL/identifier to avoid tag leakage.
|
||||
# This prevents plain tags ("adam22", "10 books") from entering the URL list.
|
||||
low = u.lower()
|
||||
has_scheme = low.startswith((
|
||||
"http://", "https://", "magnet:", "torrent:", "tidal:",
|
||||
"hydrus:", "ytdl:", "soulseek:", "matrix:", "file:"
|
||||
))
|
||||
if not (has_scheme or "://" in low):
|
||||
return None
|
||||
|
||||
# IMPORTANT: URLs can be case-sensitive in the path/query on some hosts
|
||||
# (e.g., https://0x0.st/PzGY.webp). Do not lowercase or otherwise rewrite
|
||||
# the URL here; preserve exact casing and percent-encoding.
|
||||
|
||||
Reference in New Issue
Block a user