This commit is contained in:
2026-01-24 01:38:12 -08:00
parent 4e4c374908
commit 3a4d3f029d
5 changed files with 210 additions and 229 deletions

View File

@@ -1,5 +1,5 @@
import re
from typing import Any, Dict, List, Set
from typing import Any, Dict, List, Optional, Set
def value_normalize(value: Any) -> str:
@@ -19,6 +19,18 @@ def _add_tag(tags: List[str], namespace: str, value: str) -> None:
tags.append(candidate)
def _extract_channel_from_tag(tag_value: str) -> Optional[str]:
"""Return the channel value if tag_value is namespaced with channel."""
if not tag_value:
return None
normalized = tag_value.strip().lower()
if not normalized.startswith("channel:"):
return None
_, _, remainder = normalized.partition(":")
remainder = remainder.strip()
return remainder or None
def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
""" """
tags: List[str] = []
@@ -67,7 +79,12 @@ def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
for tag_value in tags_field:
if tag_value:
normalized = value_normalize(str(tag_value))
if normalized and normalized not in tags:
if not normalized:
continue
channel_candidate = _extract_channel_from_tag(normalized)
if channel_candidate:
_add_tag(tags, "channel", channel_candidate)
if normalized not in tags:
tags.append(normalized)
elif isinstance(tags_field, dict):
# Tags is dict: {"key": "val"} → tag:key:val
@@ -83,10 +100,16 @@ def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
if tag_str:
for tag_value in re.split(r'[,\s]+', tag_str):
tag_value = tag_value.strip()
if tag_value:
normalized = value_normalize(tag_value)
if normalized and normalized not in tags:
tags.append(normalized)
if not tag_value:
continue
normalized = value_normalize(tag_value)
if not normalized:
continue
channel_candidate = _extract_channel_from_tag(normalized)
if channel_candidate:
_add_tag(tags, "channel", channel_candidate)
if normalized not in tags:
tags.append(normalized)
# Extract chapters as tags if present
chapters = entry.get("chapters")