j
This commit is contained in:
@@ -25,7 +25,7 @@ from SYS.models import DebugLogger, DownloadError, DownloadMediaResult, Progress
|
|||||||
from SYS.utils import ensure_directory, sha256_file
|
from SYS.utils import ensure_directory, sha256_file
|
||||||
|
|
||||||
try: # Optional; used for metadata extraction when available
|
try: # Optional; used for metadata extraction when available
|
||||||
from SYS.metadata import extract_ytdlp_tags
|
from SYS.yt_metadata import extract_ytdlp_tags
|
||||||
except Exception: # pragma: no cover - optional dependency
|
except Exception: # pragma: no cover - optional dependency
|
||||||
extract_ytdlp_tags = None # type: ignore[assignment]
|
extract_ytdlp_tags = None # type: ignore[assignment]
|
||||||
|
|
||||||
@@ -56,7 +56,7 @@ def _resolve_verify_value(verify_ssl: bool) -> Union[bool, str]:
|
|||||||
mod = sys.modules.get(mod_name)
|
mod = sys.modules.get(mod_name)
|
||||||
if mod is None:
|
if mod is None:
|
||||||
mod = __import__(mod_name)
|
mod = __import__(mod_name)
|
||||||
except Exception:
|
except (ImportError, ModuleNotFoundError):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Common APIs that return a bundle path
|
# Common APIs that return a bundle path
|
||||||
|
|||||||
@@ -353,7 +353,7 @@
|
|||||||
"filedot\\.(xyz|to|top)/([0-9a-zA-Z]{12})"
|
"filedot\\.(xyz|to|top)/([0-9a-zA-Z]{12})"
|
||||||
],
|
],
|
||||||
"regexp": "filedot\\.(xyz|to|top)/([0-9a-zA-Z]{12})",
|
"regexp": "filedot\\.(xyz|to|top)/([0-9a-zA-Z]{12})",
|
||||||
"status": false
|
"status": true
|
||||||
},
|
},
|
||||||
"filefactory": {
|
"filefactory": {
|
||||||
"name": "filefactory",
|
"name": "filefactory",
|
||||||
@@ -622,7 +622,7 @@
|
|||||||
"(simfileshare\\.net/download/[0-9]+/)"
|
"(simfileshare\\.net/download/[0-9]+/)"
|
||||||
],
|
],
|
||||||
"regexp": "(simfileshare\\.net/download/[0-9]+/)",
|
"regexp": "(simfileshare\\.net/download/[0-9]+/)",
|
||||||
"status": false
|
"status": true
|
||||||
},
|
},
|
||||||
"streamtape": {
|
"streamtape": {
|
||||||
"name": "streamtape",
|
"name": "streamtape",
|
||||||
|
|||||||
@@ -852,7 +852,7 @@ class YtdlpMetadataProvider(MetadataProvider):
|
|||||||
|
|
||||||
tags: List[str] = []
|
tags: List[str] = []
|
||||||
try:
|
try:
|
||||||
from SYS.metadata import extract_ytdlp_tags
|
from SYS.yt_metadata import extract_ytdlp_tags
|
||||||
except Exception:
|
except Exception:
|
||||||
extract_ytdlp_tags = None # type: ignore[assignment]
|
extract_ytdlp_tags = None # type: ignore[assignment]
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from SYS.logger import log, debug
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
||||||
|
|
||||||
from API.HydrusNetwork import apply_hydrus_tag_mutation, fetch_hydrus_metadata, fetch_hydrus_metadata_by_url
|
from SYS.yt_metadata import extract_ytdlp_tags
|
||||||
|
|
||||||
try: # Optional; used when available for richer metadata fetches
|
try: # Optional; used when available for richer metadata fetches
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
@@ -918,72 +918,7 @@ def apply_tag_mutation(payload: Dict[str,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
|
|
||||||
""" """
|
|
||||||
tags: List[str] = []
|
|
||||||
seen_namespaces: Set[str] = set()
|
|
||||||
|
|
||||||
# Meaningful yt-dlp fields that should become tags
|
|
||||||
# This mapping excludes technical fields: filesize, duration, format_id, vcodec, acodec, ext, etc.
|
|
||||||
field_to_namespace = {
|
|
||||||
"artist": "artist",
|
|
||||||
"album": "album",
|
|
||||||
"creator": "creator",
|
|
||||||
"uploader": "creator", # Map uploader to creator (deduplicate)
|
|
||||||
"uploader_id": "creator",
|
|
||||||
"channel": "channel",
|
|
||||||
"genre": "genre",
|
|
||||||
"track": "track",
|
|
||||||
"track_number": "track_number",
|
|
||||||
"release_date": "release_date",
|
|
||||||
"upload_date": "upload_date",
|
|
||||||
"title": "title",
|
|
||||||
"license": "license",
|
|
||||||
"location": "location",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Extract simple field mappings
|
|
||||||
for yt_field, namespace in field_to_namespace.items():
|
|
||||||
value = entry.get(yt_field)
|
|
||||||
if value is not None:
|
|
||||||
value_str = value_normalize(str(value))
|
|
||||||
if value_str:
|
|
||||||
# Prevent duplicate creator tags (only use first creator)
|
|
||||||
if namespace == "creator":
|
|
||||||
if "creator" in seen_namespaces:
|
|
||||||
continue
|
|
||||||
seen_namespaces.add("creator")
|
|
||||||
|
|
||||||
_add_tag(tags, namespace, value_str)
|
|
||||||
|
|
||||||
# Handle tags field specially (could be list, dict, or string)
|
|
||||||
# For list/sequence tags, capture as freeform (no namespace prefix)
|
|
||||||
tags_field = entry.get("tags")
|
|
||||||
if tags_field is not None:
|
|
||||||
if isinstance(tags_field, list):
|
|
||||||
# Tags is list: ["tag1", "tag2", ...] → capture as freeform tags (no "tag:" prefix)
|
|
||||||
# These are typically genre/category tags from the source (BandCamp genres, etc.)
|
|
||||||
for tag_value in tags_field:
|
|
||||||
if tag_value:
|
|
||||||
normalized = value_normalize(str(tag_value))
|
|
||||||
if normalized and normalized not in tags:
|
|
||||||
tags.append(normalized)
|
|
||||||
elif isinstance(tags_field, dict):
|
|
||||||
# Tags is dict: {"key": "val"} → tag:key:val
|
|
||||||
for key, val in tags_field.items():
|
|
||||||
if key and val:
|
|
||||||
key_normalized = value_normalize(str(key))
|
|
||||||
val_normalized = value_normalize(str(val))
|
|
||||||
if key_normalized and val_normalized:
|
|
||||||
_add_tag(tags, f"tag:{key_normalized}", val_normalized)
|
|
||||||
else:
|
|
||||||
# Tags is string or other: add as freeform
|
|
||||||
if tags_field:
|
|
||||||
normalized = value_normalize(str(tags_field))
|
|
||||||
if normalized and normalized not in tags:
|
|
||||||
tags.append(normalized)
|
|
||||||
|
|
||||||
return tags
|
|
||||||
|
|
||||||
|
|
||||||
def dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]:
|
def dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]:
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import inspect
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
@@ -996,16 +997,25 @@ class PipelineLiveProgress:
|
|||||||
def stop(self) -> None:
|
def stop(self) -> None:
|
||||||
# Safe to call whether Live is running or paused.
|
# Safe to call whether Live is running or paused.
|
||||||
if self._live is not None:
|
if self._live is not None:
|
||||||
|
stop_fn = self._live.stop
|
||||||
|
has_clear = False
|
||||||
try:
|
try:
|
||||||
try:
|
signature = inspect.signature(stop_fn)
|
||||||
self._live.stop(clear=True)
|
has_clear = "clear" in signature.parameters
|
||||||
except TypeError:
|
except (ValueError, TypeError):
|
||||||
self._live.stop()
|
|
||||||
except Exception:
|
|
||||||
self._live.stop()
|
|
||||||
except Exception:
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
if has_clear:
|
||||||
|
stop_fn(clear=True)
|
||||||
|
else:
|
||||||
|
stop_fn()
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
stop_fn()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
self._live = None
|
self._live = None
|
||||||
self._console = None
|
self._console = None
|
||||||
self._overall = None
|
self._overall = None
|
||||||
|
|||||||
@@ -2313,6 +2313,9 @@ class PipelineExecutor:
|
|||||||
if name in {"get-relationship",
|
if name in {"get-relationship",
|
||||||
"get-rel"}:
|
"get-rel"}:
|
||||||
continue
|
continue
|
||||||
|
if name in {"get-metadata",
|
||||||
|
"meta"}:
|
||||||
|
continue
|
||||||
# `.pipe` (MPV) is an interactive launcher; disable pipeline Live progress
|
# `.pipe` (MPV) is an interactive launcher; disable pipeline Live progress
|
||||||
# for it because it doesn't meaningfully "complete" (mpv may keep running)
|
# for it because it doesn't meaningfully "complete" (mpv may keep running)
|
||||||
# and Live output interferes with MPV playlist UI.
|
# and Live output interferes with MPV playlist UI.
|
||||||
|
|||||||
102
SYS/yt_metadata.py
Normal file
102
SYS/yt_metadata.py
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
import re
|
||||||
|
from typing import Any, Dict, List, Set
|
||||||
|
|
||||||
|
|
||||||
|
def value_normalize(value: Any) -> str:
|
||||||
|
text = str(value).strip()
|
||||||
|
return text.lower() if text else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _add_tag(tags: List[str], namespace: str, value: str) -> None:
|
||||||
|
"""Add a namespaced tag if not already present."""
|
||||||
|
if not namespace or not value:
|
||||||
|
return
|
||||||
|
normalized_value = value_normalize(value)
|
||||||
|
if not normalized_value:
|
||||||
|
return
|
||||||
|
candidate = f"{namespace}:{normalized_value}"
|
||||||
|
if candidate not in tags:
|
||||||
|
tags.append(candidate)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
|
||||||
|
""" """
|
||||||
|
tags: List[str] = []
|
||||||
|
seen_namespaces: Set[str] = set()
|
||||||
|
|
||||||
|
# Meaningful yt-dlp fields that should become tags
|
||||||
|
# This mapping excludes technical fields: filesize, duration, format_id, vcodec, acodec, ext, etc.
|
||||||
|
field_to_namespace = {
|
||||||
|
"artist": "artist",
|
||||||
|
"album": "album",
|
||||||
|
"creator": "creator",
|
||||||
|
"uploader": "creator", # Map uploader to creator (deduplicate)
|
||||||
|
"uploader_id": "creator",
|
||||||
|
"channel": "channel",
|
||||||
|
"genre": "genre",
|
||||||
|
"track": "track",
|
||||||
|
"track_number": "track_number",
|
||||||
|
"release_date": "release_date",
|
||||||
|
"upload_date": "upload_date",
|
||||||
|
"title": "title",
|
||||||
|
"license": "license",
|
||||||
|
"location": "location",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract simple field mappings
|
||||||
|
for yt_field, namespace in field_to_namespace.items():
|
||||||
|
value = entry.get(yt_field)
|
||||||
|
if value is not None:
|
||||||
|
value_str = value_normalize(str(value))
|
||||||
|
if value_str:
|
||||||
|
# Prevent duplicate creator tags (only use first creator)
|
||||||
|
if namespace == "creator":
|
||||||
|
if "creator" in seen_namespaces:
|
||||||
|
continue
|
||||||
|
seen_namespaces.add("creator")
|
||||||
|
|
||||||
|
_add_tag(tags, namespace, value_str)
|
||||||
|
|
||||||
|
# Handle tags field specially (could be list, dict, or string)
|
||||||
|
# For list/sequence tags, capture as freeform (no namespace prefix)
|
||||||
|
tags_field = entry.get("tags")
|
||||||
|
if tags_field is not None:
|
||||||
|
if isinstance(tags_field, list):
|
||||||
|
# Tags is list: ["tag1", "tag2", ...] → capture as freeform tags (no "tag:" prefix)
|
||||||
|
# These are typically genre/category tags from the source (BandCamp genres, etc.)
|
||||||
|
for tag_value in tags_field:
|
||||||
|
if tag_value:
|
||||||
|
normalized = value_normalize(str(tag_value))
|
||||||
|
if normalized and normalized not in tags:
|
||||||
|
tags.append(normalized)
|
||||||
|
elif isinstance(tags_field, dict):
|
||||||
|
# Tags is dict: {"key": "val"} → tag:key:val
|
||||||
|
for key, val in tags_field.items():
|
||||||
|
if key and val:
|
||||||
|
key_normalized = value_normalize(str(key))
|
||||||
|
val_normalized = value_normalize(str(val))
|
||||||
|
if key_normalized and val_normalized:
|
||||||
|
_add_tag(tags, f"tag:{key_normalized}", val_normalized)
|
||||||
|
else:
|
||||||
|
# Tags is string: "tag1,tag2" → split and capture as freeform
|
||||||
|
tag_str = str(tags_field).strip()
|
||||||
|
if tag_str:
|
||||||
|
for tag_value in re.split(r'[,\s]+', tag_str):
|
||||||
|
tag_value = tag_value.strip()
|
||||||
|
if tag_value:
|
||||||
|
normalized = value_normalize(tag_value)
|
||||||
|
if normalized and normalized not in tags:
|
||||||
|
tags.append(normalized)
|
||||||
|
|
||||||
|
# Extract chapters as tags if present
|
||||||
|
chapters = entry.get("chapters")
|
||||||
|
if chapters and isinstance(chapters, list):
|
||||||
|
for chapter in chapters:
|
||||||
|
if isinstance(chapter, dict):
|
||||||
|
title = chapter.get("title")
|
||||||
|
if title:
|
||||||
|
title_norm = value_normalize(str(title))
|
||||||
|
if title_norm and title_norm not in tags:
|
||||||
|
tags.append(title_norm)
|
||||||
|
|
||||||
|
return tags
|
||||||
@@ -358,10 +358,25 @@ def _emit_tags_as_table(
|
|||||||
# Store the table and items in history so @.. works to go back
|
# Store the table and items in history so @.. works to go back
|
||||||
# Use overlay mode so it doesn't push the previous search to history stack
|
# Use overlay mode so it doesn't push the previous search to history stack
|
||||||
# This makes get-tag behave like a transient view
|
# This makes get-tag behave like a transient view
|
||||||
|
table_applied = False
|
||||||
try:
|
try:
|
||||||
ctx.set_last_result_table_overlay(table, tag_items, subject)
|
ctx.set_last_result_table_overlay(table, tag_items, subject)
|
||||||
|
table_applied = True
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
ctx.set_last_result_table(table, tag_items, subject)
|
try:
|
||||||
|
ctx.set_last_result_table(table, tag_items, subject)
|
||||||
|
table_applied = True
|
||||||
|
except Exception:
|
||||||
|
table_applied = False
|
||||||
|
except Exception:
|
||||||
|
table_applied = False
|
||||||
|
|
||||||
|
if table_applied:
|
||||||
|
try:
|
||||||
|
if hasattr(ctx, "set_current_stage_table"):
|
||||||
|
ctx.set_current_stage_table(table)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
# Note: CLI will handle displaying the table via ResultTable formatting
|
# Note: CLI will handle displaying the table via ResultTable formatting
|
||||||
|
|
||||||
|
|
||||||
@@ -776,7 +791,7 @@ def _scrape_url_metadata(
|
|||||||
import json as json_module
|
import json as json_module
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from SYS.metadata import extract_ytdlp_tags
|
from SYS.yt_metadata import extract_ytdlp_tags
|
||||||
except ImportError:
|
except ImportError:
|
||||||
extract_ytdlp_tags = None
|
extract_ytdlp_tags = None
|
||||||
|
|
||||||
@@ -1613,6 +1628,33 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
if isinstance(result, list) and len(result) > 0:
|
if isinstance(result, list) and len(result) > 0:
|
||||||
result = result[0]
|
result = result[0]
|
||||||
|
|
||||||
|
try:
|
||||||
|
display_subject = ctx.get_last_result_subject()
|
||||||
|
except Exception:
|
||||||
|
display_subject = None
|
||||||
|
|
||||||
|
def _value_has_content(value: Any) -> bool:
|
||||||
|
if value is None:
|
||||||
|
return False
|
||||||
|
if isinstance(value, str):
|
||||||
|
return bool(value.strip())
|
||||||
|
if isinstance(value, (list, tuple, set)):
|
||||||
|
return len(value) > 0
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _resolve_subject_value(*keys: str) -> Any:
|
||||||
|
for key in keys:
|
||||||
|
val = get_field(result, key, None)
|
||||||
|
if _value_has_content(val):
|
||||||
|
return val
|
||||||
|
if display_subject is None:
|
||||||
|
return None
|
||||||
|
for key in keys:
|
||||||
|
val = get_field(display_subject, key, None)
|
||||||
|
if _value_has_content(val):
|
||||||
|
return val
|
||||||
|
return None
|
||||||
|
|
||||||
# If the current result already carries a tag list (e.g. a selected metadata
|
# If the current result already carries a tag list (e.g. a selected metadata
|
||||||
# row from get-tag -scrape itunes), APPLY those tags to the file in the store.
|
# row from get-tag -scrape itunes), APPLY those tags to the file in the store.
|
||||||
result_provider = get_field(result, "provider", None)
|
result_provider = get_field(result, "provider", None)
|
||||||
@@ -1726,7 +1768,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
)
|
)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
hash_from_result = normalize_hash(get_field(result, "hash", None))
|
hash_from_result = normalize_hash(_resolve_subject_value("hash"))
|
||||||
file_hash = hash_override or hash_from_result
|
file_hash = hash_override or hash_from_result
|
||||||
# Only use emit mode if explicitly requested with --emit flag, not just because we're in a pipeline
|
# Only use emit mode if explicitly requested with --emit flag, not just because we're in a pipeline
|
||||||
# This allows interactive REPL to work even in pipelines
|
# This allows interactive REPL to work even in pipelines
|
||||||
@@ -1734,7 +1776,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
store_label = store_key.strip() if store_key and store_key.strip() else None
|
store_label = store_key.strip() if store_key and store_key.strip() else None
|
||||||
|
|
||||||
# Get hash and store from result
|
# Get hash and store from result
|
||||||
store_name = get_field(result, "store")
|
store_value = _resolve_subject_value("store")
|
||||||
|
store_name = str(store_value).strip() if store_value is not None else None
|
||||||
|
|
||||||
if not file_hash:
|
if not file_hash:
|
||||||
log("No hash available in result", file=sys.stderr)
|
log("No hash available in result", file=sys.stderr)
|
||||||
@@ -1744,6 +1787,68 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
log("No store specified in result", file=sys.stderr)
|
log("No store specified in result", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
item_title = (
|
||||||
|
_resolve_subject_value("title", "name", "filename")
|
||||||
|
)
|
||||||
|
subject_store = store_name
|
||||||
|
subject_path_value = (
|
||||||
|
_resolve_subject_value("path", "target", "filename")
|
||||||
|
)
|
||||||
|
subject_path = None
|
||||||
|
if subject_path_value is not None:
|
||||||
|
try:
|
||||||
|
subject_path = str(subject_path_value)
|
||||||
|
except Exception:
|
||||||
|
subject_path = None
|
||||||
|
|
||||||
|
service_name = ""
|
||||||
|
subject_payload_base: Dict[str, Any] = {
|
||||||
|
"tag": [],
|
||||||
|
"title": item_title,
|
||||||
|
"name": item_title,
|
||||||
|
"store": subject_store,
|
||||||
|
"service_name": service_name,
|
||||||
|
"extra": {
|
||||||
|
"tag": [],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if file_hash:
|
||||||
|
subject_payload_base["hash"] = file_hash
|
||||||
|
if subject_path:
|
||||||
|
subject_payload_base["path"] = subject_path
|
||||||
|
|
||||||
|
def _subject_payload_with(
|
||||||
|
tags: Sequence[str],
|
||||||
|
service_name_override: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
payload = dict(subject_payload_base)
|
||||||
|
payload["tag"] = list(tags)
|
||||||
|
extra = {"tag": list(tags)}
|
||||||
|
payload["extra"] = extra
|
||||||
|
if service_name_override is not None:
|
||||||
|
payload["service_name"] = service_name_override
|
||||||
|
return payload
|
||||||
|
|
||||||
|
raw_result_tags = get_field(result, "tag", None)
|
||||||
|
if not isinstance(raw_result_tags, list):
|
||||||
|
raw_result_tags = get_field(result, "tags", None)
|
||||||
|
display_tags: List[str] = []
|
||||||
|
if isinstance(raw_result_tags, list):
|
||||||
|
display_tags = [str(t) for t in raw_result_tags if t is not None]
|
||||||
|
if display_tags and not emit_mode:
|
||||||
|
subject_payload = _subject_payload_with(display_tags)
|
||||||
|
_emit_tags_as_table(
|
||||||
|
display_tags,
|
||||||
|
file_hash=file_hash,
|
||||||
|
store=str(subject_store),
|
||||||
|
service_name=None,
|
||||||
|
config=config,
|
||||||
|
item_title=item_title,
|
||||||
|
path=subject_path,
|
||||||
|
subject=subject_payload,
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
# Get tags using storage backend
|
# Get tags using storage backend
|
||||||
try:
|
try:
|
||||||
from Store import Store
|
from Store import Store
|
||||||
@@ -1761,56 +1866,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
log(f"Failed to get tags: {exc}", file=sys.stderr)
|
log(f"Failed to get tags: {exc}", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
# Always output to ResultTable (pipeline mode only)
|
subject_payload = _subject_payload_with(
|
||||||
# Extract title for table header
|
current,
|
||||||
item_title = (
|
service_name if source == "hydrus" else None,
|
||||||
get_field(result,
|
|
||||||
"title",
|
|
||||||
None) or get_field(result,
|
|
||||||
"name",
|
|
||||||
None) or get_field(result,
|
|
||||||
"filename",
|
|
||||||
None)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Build a subject payload representing the file whose tags are being shown
|
|
||||||
subject_store = get_field(result, "store", None) or store_name
|
|
||||||
subject_path = (
|
|
||||||
get_field(result,
|
|
||||||
"path",
|
|
||||||
None) or get_field(result,
|
|
||||||
"target",
|
|
||||||
None) or get_field(result,
|
|
||||||
"filename",
|
|
||||||
None)
|
|
||||||
)
|
|
||||||
subject_payload: Dict[str,
|
|
||||||
Any] = {
|
|
||||||
"tag": list(current),
|
|
||||||
"title": item_title,
|
|
||||||
"name": item_title,
|
|
||||||
"store": subject_store,
|
|
||||||
"service_name": service_name,
|
|
||||||
"extra": {
|
|
||||||
"tag": list(current),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
if file_hash:
|
|
||||||
subject_payload["hash"] = file_hash
|
|
||||||
if subject_path:
|
|
||||||
try:
|
|
||||||
subject_payload["path"] = str(subject_path)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
_emit_tags_as_table(
|
_emit_tags_as_table(
|
||||||
current,
|
current,
|
||||||
file_hash=file_hash,
|
file_hash=file_hash,
|
||||||
store=subject_store,
|
store=str(subject_store),
|
||||||
service_name=service_name if source == "hydrus" else None,
|
service_name=service_name if source == "hydrus" else None,
|
||||||
config=config,
|
config=config,
|
||||||
item_title=item_title,
|
item_title=item_title,
|
||||||
path=str(subject_path) if subject_path else None,
|
path=subject_path,
|
||||||
subject=subject_payload,
|
subject=subject_payload,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
125
tool/ytdlp.py
125
tool/ytdlp.py
@@ -27,7 +27,7 @@ from SYS.models import (
|
|||||||
)
|
)
|
||||||
from SYS.pipeline_progress import PipelineProgress
|
from SYS.pipeline_progress import PipelineProgress
|
||||||
from SYS.utils import ensure_directory, sha256_file
|
from SYS.utils import ensure_directory, sha256_file
|
||||||
from SYS.metadata import extract_ytdlp_tags
|
from SYS.yt_metadata import extract_ytdlp_tags
|
||||||
|
|
||||||
_YTDLP_TRANSFER_STATE: Dict[str, Dict[str, Any]] = {}
|
_YTDLP_TRANSFER_STATE: Dict[str, Dict[str, Any]] = {}
|
||||||
|
|
||||||
@@ -44,6 +44,96 @@ else:
|
|||||||
|
|
||||||
_EXTRACTOR_CACHE: List[Any] | None = None
|
_EXTRACTOR_CACHE: List[Any] | None = None
|
||||||
|
|
||||||
|
# Patterns for domain extraction from yt-dlp regexes
|
||||||
|
# 1) Alternation group followed by \.tld e.g. (?:youtube|youtu|youtube-nocookie)\.com
|
||||||
|
ALT_GROUP_TLD = re.compile(r'\((?:\?:)?([^\)]+)\)\\\.(?P<tld>[A-Za-z0-9.+-]+)')
|
||||||
|
# 2) Literal domain pieces like youtube\.com or youtu\.be (not preceded by a group)
|
||||||
|
LITERAL_DOMAIN = re.compile(r'(?<!\()(?<!\|)(?<!:)([A-Za-z0-9][A-Za-z0-9_-]{0,})\\\.([A-Za-z0-9.+-]+)')
|
||||||
|
# 3) Partial domain tokens that appear alone (e.g., zhihu) — treat as zhihu.com fallback
|
||||||
|
PARTIAL_TOKEN = re.compile(r'(?<![A-Za-z0-9_-])([A-Za-z0-9][A-Za-z0-9_-]{1,})(?=(?:\\?[/\)\$]|\\\.|$))')
|
||||||
|
|
||||||
|
_SUPPORTED_DOMAINS: set[str] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_patterns(valid_url) -> List[str]:
|
||||||
|
if not valid_url:
|
||||||
|
return []
|
||||||
|
if isinstance(valid_url, str):
|
||||||
|
return [valid_url]
|
||||||
|
if isinstance(valid_url, (list, tuple)):
|
||||||
|
return [p for p in valid_url if isinstance(p, str)]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def extract_from_pattern(pat: str) -> set[str]:
|
||||||
|
domains = set()
|
||||||
|
|
||||||
|
# 1) Alternation groups followed by .tld
|
||||||
|
for alt_group, tld in ALT_GROUP_TLD.findall(pat):
|
||||||
|
# alt_group like "youtube|youtu|youtube-nocookie"
|
||||||
|
for alt in alt_group.split('|'):
|
||||||
|
alt = alt.strip()
|
||||||
|
# remove any non-domain tokens like (?:www\.)? if present inside alt (rare)
|
||||||
|
alt = re.sub(r'\(\?:www\\\.\)\?', '', alt)
|
||||||
|
if alt:
|
||||||
|
domains.add(f"{alt}.{tld}".lower())
|
||||||
|
|
||||||
|
# 2) Literal domain matches (youtube\.com)
|
||||||
|
for name, tld in LITERAL_DOMAIN.findall(pat):
|
||||||
|
domains.add(f"{name}.{tld}".lower())
|
||||||
|
|
||||||
|
# 3) Partial tokens fallback (only if we didn't already capture domains)
|
||||||
|
# This helps when regexes contain plain tokens like 'zhihu' or 'vimeo' without .com
|
||||||
|
if not domains:
|
||||||
|
for token in PARTIAL_TOKEN.findall(pat):
|
||||||
|
# ignore common regex words that are not domains
|
||||||
|
if len(token) <= 2:
|
||||||
|
continue
|
||||||
|
# avoid tokens that are clearly regex constructs
|
||||||
|
if token.lower() in {"https", "http", "www", "com", "net", "org"}:
|
||||||
|
continue
|
||||||
|
domains.add(f"{token.lower()}.com")
|
||||||
|
|
||||||
|
return domains
|
||||||
|
|
||||||
|
|
||||||
|
def extract_domains(valid_url) -> set[str]:
|
||||||
|
patterns = normalize_patterns(valid_url)
|
||||||
|
all_domains = set()
|
||||||
|
for pat in patterns:
|
||||||
|
all_domains |= extract_from_pattern(pat)
|
||||||
|
# final cleanup: remove obvious junk like 'com.com' if present
|
||||||
|
cleaned = set()
|
||||||
|
for d in all_domains:
|
||||||
|
# drop duplicates where left side equals tld (e.g., com.com)
|
||||||
|
parts = d.split('.')
|
||||||
|
if len(parts) >= 2 and parts[-2] == parts[-1]:
|
||||||
|
continue
|
||||||
|
cleaned.add(d)
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def _build_supported_domains() -> set[str]:
|
||||||
|
global _SUPPORTED_DOMAINS
|
||||||
|
if _SUPPORTED_DOMAINS is not None:
|
||||||
|
return _SUPPORTED_DOMAINS
|
||||||
|
|
||||||
|
_SUPPORTED_DOMAINS = set()
|
||||||
|
if gen_extractors is None:
|
||||||
|
return _SUPPORTED_DOMAINS
|
||||||
|
|
||||||
|
try:
|
||||||
|
for e in gen_extractors():
|
||||||
|
name = getattr(e, "IE_NAME", "").lower()
|
||||||
|
if name == "generic":
|
||||||
|
continue
|
||||||
|
regex = getattr(e, "_VALID_URL", None)
|
||||||
|
domains = extract_domains(regex)
|
||||||
|
_SUPPORTED_DOMAINS.update(domains)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return _SUPPORTED_DOMAINS
|
||||||
|
|
||||||
|
|
||||||
def _get_nested(config: Dict[str, Any], *path: str) -> Any:
|
def _get_nested(config: Dict[str, Any], *path: str) -> Any:
|
||||||
cur: Any = config
|
cur: Any = config
|
||||||
@@ -122,17 +212,14 @@ def is_url_supported_by_ytdlp(url: str) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for extractor in _get_extractors():
|
parsed = urlparse(url)
|
||||||
try:
|
domain = parsed.netloc.lower()
|
||||||
if not extractor.suitable(url):
|
if not domain:
|
||||||
continue
|
return False
|
||||||
except Exception:
|
supported = _build_supported_domains()
|
||||||
continue
|
for base in supported:
|
||||||
|
if domain == base or domain.endswith("." + base):
|
||||||
name = getattr(extractor, "IE_NAME", "").lower()
|
return True
|
||||||
if name == "generic":
|
|
||||||
continue
|
|
||||||
return True
|
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -593,16 +680,22 @@ class YtDlpTool:
|
|||||||
# Defaulting to 'chrome' as the most common path.
|
# Defaulting to 'chrome' as the most common path.
|
||||||
base_options["cookiesfrombrowser"] = "chrome"
|
base_options["cookiesfrombrowser"] = "chrome"
|
||||||
|
|
||||||
|
# Special handling for format keywords
|
||||||
|
if opts.ytdl_format == "audio":
|
||||||
|
opts = opts._replace(mode="audio", ytdl_format=None)
|
||||||
|
elif opts.ytdl_format == "video":
|
||||||
|
opts = opts._replace(mode="video", ytdl_format=None)
|
||||||
|
|
||||||
if opts.no_playlist:
|
if opts.no_playlist:
|
||||||
base_options["noplaylist"] = True
|
base_options["noplaylist"] = True
|
||||||
|
|
||||||
fmt = opts.ytdl_format or self.default_format(opts.mode)
|
fmt = opts.ytdl_format or self.default_format(opts.mode)
|
||||||
base_options["format"] = fmt
|
base_options["format"] = fmt
|
||||||
|
|
||||||
# if opts.mode == "audio":
|
if opts.mode == "audio":
|
||||||
# base_options["postprocessors"] = [{
|
base_options["postprocessors"] = [{
|
||||||
# "key": "FFmpegExtractAudio"
|
"key": "FFmpegExtractAudio"
|
||||||
# }]
|
}]
|
||||||
|
|
||||||
if opts.mode != "audio":
|
if opts.mode != "audio":
|
||||||
format_sort = self.defaults.format_sort or [
|
format_sort = self.defaults.format_sort or [
|
||||||
|
|||||||
Reference in New Issue
Block a user