j

2026-01-20 16:42:49 -08:00
parent 1e2054189b
commit 922b649e17
9 changed files with 351 additions and 141 deletions
@@ -7,7 +7,7 @@ from SYS.logger import log, debug
 from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple

-from API.HydrusNetwork import apply_hydrus_tag_mutation, fetch_hydrus_metadata, fetch_hydrus_metadata_by_url
+from SYS.yt_metadata import extract_ytdlp_tags

 try:  # Optional; used when available for richer metadata fetches
    import yt_dlp
@@ -918,72 +918,7 @@ def apply_tag_mutation(payload: Dict[str,
            }


-def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
-    """ """
-    tags: List[str] = []
-    seen_namespaces: Set[str] = set()

-    # Meaningful yt-dlp fields that should become tags
-    # This mapping excludes technical fields: filesize, duration, format_id, vcodec, acodec, ext, etc.
-    field_to_namespace = {
-        "artist": "artist",
-        "album": "album",
-        "creator": "creator",
-        "uploader": "creator",  # Map uploader to creator (deduplicate)
-        "uploader_id": "creator",
-        "channel": "channel",
-        "genre": "genre",
-        "track": "track",
-        "track_number": "track_number",
-        "release_date": "release_date",
-        "upload_date": "upload_date",
-        "title": "title",
-        "license": "license",
-        "location": "location",
-    }
-
-    # Extract simple field mappings
-    for yt_field, namespace in field_to_namespace.items():
-        value = entry.get(yt_field)
-        if value is not None:
-            value_str = value_normalize(str(value))
-            if value_str:
-                # Prevent duplicate creator tags (only use first creator)
-                if namespace == "creator":
-                    if "creator" in seen_namespaces:
-                        continue
-                    seen_namespaces.add("creator")
-
-                _add_tag(tags, namespace, value_str)
-
-    # Handle tags field specially (could be list, dict, or string)
-    # For list/sequence tags, capture as freeform (no namespace prefix)
-    tags_field = entry.get("tags")
-    if tags_field is not None:
-        if isinstance(tags_field, list):
-            # Tags is list: ["tag1", "tag2", ...] → capture as freeform tags (no "tag:" prefix)
-            # These are typically genre/category tags from the source (BandCamp genres, etc.)
-            for tag_value in tags_field:
-                if tag_value:
-                    normalized = value_normalize(str(tag_value))
-                    if normalized and normalized not in tags:
-                        tags.append(normalized)
-        elif isinstance(tags_field, dict):
-            # Tags is dict: {"key": "val"} → tag:key:val
-            for key, val in tags_field.items():
-                if key and val:
-                    key_normalized = value_normalize(str(key))
-                    val_normalized = value_normalize(str(val))
-                    if key_normalized and val_normalized:
-                        _add_tag(tags, f"tag:{key_normalized}", val_normalized)
-        else:
-            # Tags is string or other: add as freeform
-            if tags_field:
-                normalized = value_normalize(str(tags_field))
-                if normalized and normalized not in tags:
-                    tags.append(normalized)
-
-    return tags


 def dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]:
@@ -2,6 +2,7 @@

 import datetime
 import hashlib
+import inspect
 import json
 import os
 import shutil
@@ -996,16 +997,25 @@ class PipelineLiveProgress:
    def stop(self) -> None:
        # Safe to call whether Live is running or paused.
        if self._live is not None:
+            stop_fn = self._live.stop
+            has_clear = False
            try:
-                try:
-                    self._live.stop(clear=True)
-                except TypeError:
-                    self._live.stop()
-                except Exception:
-                    self._live.stop()
-            except Exception:
+                signature = inspect.signature(stop_fn)
+                has_clear = "clear" in signature.parameters
+            except (ValueError, TypeError):
                pass

+            try:
+                if has_clear:
+                    stop_fn(clear=True)
+                else:
+                    stop_fn()
+            except Exception:
+                try:
+                    stop_fn()
+                except Exception:
+                    pass
+
        self._live = None
        self._console = None
        self._overall = None
@@ -2313,6 +2313,9 @@ class PipelineExecutor:
                    if name in {"get-relationship",
                                "get-rel"}:
                        continue
+                    if name in {"get-metadata",
+                                "meta"}:
+                        continue
                    # `.pipe` (MPV) is an interactive launcher; disable pipeline Live progress
                    # for it because it doesn't meaningfully "complete" (mpv may keep running)
                    # and Live output interferes with MPV playlist UI.
@@ -0,0 +1,102 @@
+import re
+from typing import Any, Dict, List, Set
+
+
+def value_normalize(value: Any) -> str:
+    text = str(value).strip()
+    return text.lower() if text else ""
+
+
+def _add_tag(tags: List[str], namespace: str, value: str) -> None:
+    """Add a namespaced tag if not already present."""
+    if not namespace or not value:
+        return
+    normalized_value = value_normalize(value)
+    if not normalized_value:
+        return
+    candidate = f"{namespace}:{normalized_value}"
+    if candidate not in tags:
+        tags.append(candidate)
+
+
+def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
+    """ """
+    tags: List[str] = []
+    seen_namespaces: Set[str] = set()
+
+    # Meaningful yt-dlp fields that should become tags
+    # This mapping excludes technical fields: filesize, duration, format_id, vcodec, acodec, ext, etc.
+    field_to_namespace = {
+        "artist": "artist",
+        "album": "album",
+        "creator": "creator",
+        "uploader": "creator",  # Map uploader to creator (deduplicate)
+        "uploader_id": "creator",
+        "channel": "channel",
+        "genre": "genre",
+        "track": "track",
+        "track_number": "track_number",
+        "release_date": "release_date",
+        "upload_date": "upload_date",
+        "title": "title",
+        "license": "license",
+        "location": "location",
+    }
+
+    # Extract simple field mappings
+    for yt_field, namespace in field_to_namespace.items():
+        value = entry.get(yt_field)
+        if value is not None:
+            value_str = value_normalize(str(value))
+            if value_str:
+                # Prevent duplicate creator tags (only use first creator)
+                if namespace == "creator":
+                    if "creator" in seen_namespaces:
+                        continue
+                    seen_namespaces.add("creator")
+
+                _add_tag(tags, namespace, value_str)
+
+    # Handle tags field specially (could be list, dict, or string)
+    # For list/sequence tags, capture as freeform (no namespace prefix)
+    tags_field = entry.get("tags")
+    if tags_field is not None:
+        if isinstance(tags_field, list):
+            # Tags is list: ["tag1", "tag2", ...] → capture as freeform tags (no "tag:" prefix)
+            # These are typically genre/category tags from the source (BandCamp genres, etc.)
+            for tag_value in tags_field:
+                if tag_value:
+                    normalized = value_normalize(str(tag_value))
+                    if normalized and normalized not in tags:
+                        tags.append(normalized)
+        elif isinstance(tags_field, dict):
+            # Tags is dict: {"key": "val"} → tag:key:val
+            for key, val in tags_field.items():
+                if key and val:
+                    key_normalized = value_normalize(str(key))
+                    val_normalized = value_normalize(str(val))
+                    if key_normalized and val_normalized:
+                        _add_tag(tags, f"tag:{key_normalized}", val_normalized)
+        else:
+            # Tags is string: "tag1,tag2" → split and capture as freeform
+            tag_str = str(tags_field).strip()
+            if tag_str:
+                for tag_value in re.split(r'[,\s]+', tag_str):
+                    tag_value = tag_value.strip()
+                    if tag_value:
+                        normalized = value_normalize(tag_value)
+                        if normalized and normalized not in tags:
+                            tags.append(normalized)
+
+    # Extract chapters as tags if present
+    chapters = entry.get("chapters")
+    if chapters and isinstance(chapters, list):
+        for chapter in chapters:
+            if isinstance(chapter, dict):
+                title = chapter.get("title")
+                if title:
+                    title_norm = value_normalize(str(title))
+                    if title_norm and title_norm not in tags:
+                        tags.append(title_norm)
+
+    return tags