This commit is contained in:
2026-01-20 16:42:49 -08:00
parent 1e2054189b
commit 922b649e17
9 changed files with 351 additions and 141 deletions

View File

@@ -7,7 +7,7 @@ from SYS.logger import log, debug
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
from API.HydrusNetwork import apply_hydrus_tag_mutation, fetch_hydrus_metadata, fetch_hydrus_metadata_by_url
from SYS.yt_metadata import extract_ytdlp_tags
try: # Optional; used when available for richer metadata fetches
import yt_dlp
@@ -918,72 +918,7 @@ def apply_tag_mutation(payload: Dict[str,
}
def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
""" """
tags: List[str] = []
seen_namespaces: Set[str] = set()
# Meaningful yt-dlp fields that should become tags
# This mapping excludes technical fields: filesize, duration, format_id, vcodec, acodec, ext, etc.
field_to_namespace = {
"artist": "artist",
"album": "album",
"creator": "creator",
"uploader": "creator", # Map uploader to creator (deduplicate)
"uploader_id": "creator",
"channel": "channel",
"genre": "genre",
"track": "track",
"track_number": "track_number",
"release_date": "release_date",
"upload_date": "upload_date",
"title": "title",
"license": "license",
"location": "location",
}
# Extract simple field mappings
for yt_field, namespace in field_to_namespace.items():
value = entry.get(yt_field)
if value is not None:
value_str = value_normalize(str(value))
if value_str:
# Prevent duplicate creator tags (only use first creator)
if namespace == "creator":
if "creator" in seen_namespaces:
continue
seen_namespaces.add("creator")
_add_tag(tags, namespace, value_str)
# Handle tags field specially (could be list, dict, or string)
# For list/sequence tags, capture as freeform (no namespace prefix)
tags_field = entry.get("tags")
if tags_field is not None:
if isinstance(tags_field, list):
# Tags is list: ["tag1", "tag2", ...] → capture as freeform tags (no "tag:" prefix)
# These are typically genre/category tags from the source (BandCamp genres, etc.)
for tag_value in tags_field:
if tag_value:
normalized = value_normalize(str(tag_value))
if normalized and normalized not in tags:
tags.append(normalized)
elif isinstance(tags_field, dict):
# Tags is dict: {"key": "val"} → tag:key:val
for key, val in tags_field.items():
if key and val:
key_normalized = value_normalize(str(key))
val_normalized = value_normalize(str(val))
if key_normalized and val_normalized:
_add_tag(tags, f"tag:{key_normalized}", val_normalized)
else:
# Tags is string or other: add as freeform
if tags_field:
normalized = value_normalize(str(tags_field))
if normalized and normalized not in tags:
tags.append(normalized)
return tags
def dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]:

View File

@@ -2,6 +2,7 @@
import datetime
import hashlib
import inspect
import json
import os
import shutil
@@ -996,16 +997,25 @@ class PipelineLiveProgress:
def stop(self) -> None:
# Safe to call whether Live is running or paused.
if self._live is not None:
stop_fn = self._live.stop
has_clear = False
try:
try:
self._live.stop(clear=True)
except TypeError:
self._live.stop()
except Exception:
self._live.stop()
except Exception:
signature = inspect.signature(stop_fn)
has_clear = "clear" in signature.parameters
except (ValueError, TypeError):
pass
try:
if has_clear:
stop_fn(clear=True)
else:
stop_fn()
except Exception:
try:
stop_fn()
except Exception:
pass
self._live = None
self._console = None
self._overall = None

View File

@@ -2313,6 +2313,9 @@ class PipelineExecutor:
if name in {"get-relationship",
"get-rel"}:
continue
if name in {"get-metadata",
"meta"}:
continue
# `.pipe` (MPV) is an interactive launcher; disable pipeline Live progress
# for it because it doesn't meaningfully "complete" (mpv may keep running)
# and Live output interferes with MPV playlist UI.

102
SYS/yt_metadata.py Normal file
View File

@@ -0,0 +1,102 @@
import re
from typing import Any, Dict, List, Set
def value_normalize(value: Any) -> str:
text = str(value).strip()
return text.lower() if text else ""
def _add_tag(tags: List[str], namespace: str, value: str) -> None:
"""Add a namespaced tag if not already present."""
if not namespace or not value:
return
normalized_value = value_normalize(value)
if not normalized_value:
return
candidate = f"{namespace}:{normalized_value}"
if candidate not in tags:
tags.append(candidate)
def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
""" """
tags: List[str] = []
seen_namespaces: Set[str] = set()
# Meaningful yt-dlp fields that should become tags
# This mapping excludes technical fields: filesize, duration, format_id, vcodec, acodec, ext, etc.
field_to_namespace = {
"artist": "artist",
"album": "album",
"creator": "creator",
"uploader": "creator", # Map uploader to creator (deduplicate)
"uploader_id": "creator",
"channel": "channel",
"genre": "genre",
"track": "track",
"track_number": "track_number",
"release_date": "release_date",
"upload_date": "upload_date",
"title": "title",
"license": "license",
"location": "location",
}
# Extract simple field mappings
for yt_field, namespace in field_to_namespace.items():
value = entry.get(yt_field)
if value is not None:
value_str = value_normalize(str(value))
if value_str:
# Prevent duplicate creator tags (only use first creator)
if namespace == "creator":
if "creator" in seen_namespaces:
continue
seen_namespaces.add("creator")
_add_tag(tags, namespace, value_str)
# Handle tags field specially (could be list, dict, or string)
# For list/sequence tags, capture as freeform (no namespace prefix)
tags_field = entry.get("tags")
if tags_field is not None:
if isinstance(tags_field, list):
# Tags is list: ["tag1", "tag2", ...] → capture as freeform tags (no "tag:" prefix)
# These are typically genre/category tags from the source (BandCamp genres, etc.)
for tag_value in tags_field:
if tag_value:
normalized = value_normalize(str(tag_value))
if normalized and normalized not in tags:
tags.append(normalized)
elif isinstance(tags_field, dict):
# Tags is dict: {"key": "val"} → tag:key:val
for key, val in tags_field.items():
if key and val:
key_normalized = value_normalize(str(key))
val_normalized = value_normalize(str(val))
if key_normalized and val_normalized:
_add_tag(tags, f"tag:{key_normalized}", val_normalized)
else:
# Tags is string: "tag1,tag2" → split and capture as freeform
tag_str = str(tags_field).strip()
if tag_str:
for tag_value in re.split(r'[,\s]+', tag_str):
tag_value = tag_value.strip()
if tag_value:
normalized = value_normalize(tag_value)
if normalized and normalized not in tags:
tags.append(normalized)
# Extract chapters as tags if present
chapters = entry.get("chapters")
if chapters and isinstance(chapters, list):
for chapter in chapters:
if isinstance(chapter, dict):
title = chapter.get("title")
if title:
title_norm = value_normalize(str(title))
if title_norm and title_norm not in tags:
tags.append(title_norm)
return tags