d
This commit is contained in:
@@ -585,6 +585,15 @@ def parse_cmdlet_args(args: Sequence[str],
|
||||
result = parse_cmdlet_args(["value1", "-count", "5"], cmdlet)
|
||||
# result = {"path": "value1", "count": "5"}
|
||||
"""
|
||||
try:
|
||||
from SYS.cmdlet_spec import parse_cmdlet_args as _parse_cmdlet_args_fast
|
||||
|
||||
return _parse_cmdlet_args_fast(args, cmdlet_spec)
|
||||
except Exception:
|
||||
# Fall back to local implementation below to preserve behavior if the
|
||||
# lightweight parser is unavailable.
|
||||
pass
|
||||
|
||||
result: Dict[str,
|
||||
Any] = {}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
from SYS import pipeline as ctx
|
||||
from SYS.pipeline_progress import PipelineProgress
|
||||
from . import _shared as sh
|
||||
from SYS.field_access import get_field
|
||||
|
||||
normalize_hash = sh.normalize_hash
|
||||
looks_like_hash = sh.looks_like_hash
|
||||
@@ -34,7 +35,6 @@ Cmdlet = sh.Cmdlet
|
||||
CmdletArg = sh.CmdletArg
|
||||
SharedArgs = sh.SharedArgs
|
||||
parse_cmdlet_args = sh.parse_cmdlet_args
|
||||
get_field = sh.get_field
|
||||
|
||||
try:
|
||||
from SYS.metadata import extract_title
|
||||
@@ -60,84 +60,6 @@ def _dedup_tags_preserve_order(tags: List[str]) -> List[str]:
|
||||
return out
|
||||
|
||||
|
||||
def _extract_subtitle_tags(info: Dict[str, Any]) -> List[str]:
|
||||
"""Extract subtitle availability tags from a yt-dlp info dict.
|
||||
|
||||
Produces multi-valued tags so languages can coexist:
|
||||
- subs:<lang>
|
||||
- subs_auto:<lang>
|
||||
"""
|
||||
|
||||
def _langs(value: Any) -> List[str]:
|
||||
if not isinstance(value, dict):
|
||||
return []
|
||||
langs: List[str] = []
|
||||
for k in value.keys():
|
||||
if not isinstance(k, str):
|
||||
continue
|
||||
lang = k.strip().lower()
|
||||
if lang:
|
||||
langs.append(lang)
|
||||
return sorted(set(langs))
|
||||
|
||||
out: List[str] = []
|
||||
for lang in _langs(info.get("subtitles")):
|
||||
out.append(f"subs:{lang}")
|
||||
for lang in _langs(info.get("automatic_captions")):
|
||||
out.append(f"subs_auto:{lang}")
|
||||
return out
|
||||
|
||||
|
||||
def _scrape_ytdlp_info(url: str) -> Optional[Dict[str, Any]]:
|
||||
"""Fetch a yt-dlp info dict without downloading media."""
|
||||
if not isinstance(url, str) or not url.strip():
|
||||
return None
|
||||
url = url.strip()
|
||||
|
||||
# Prefer the Python module when available (faster, avoids shell quoting issues).
|
||||
try:
|
||||
import yt_dlp # type: ignore
|
||||
|
||||
opts: Any = {
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"skip_download": True,
|
||||
"noprogress": True,
|
||||
"socket_timeout": 15,
|
||||
"retries": 1,
|
||||
"playlist_items": "1-10",
|
||||
}
|
||||
with yt_dlp.YoutubeDL(opts) as ydl:
|
||||
info = ydl.extract_info(url, download=False)
|
||||
return info if isinstance(info, dict) else None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to yt-dlp CLI if the module isn't available.
|
||||
try:
|
||||
import json as json_module
|
||||
|
||||
cmd = [
|
||||
"yt-dlp",
|
||||
"-J",
|
||||
"--no-warnings",
|
||||
"--skip-download",
|
||||
"--playlist-items",
|
||||
"1-10",
|
||||
url,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
payload = (result.stdout or "").strip()
|
||||
if not payload:
|
||||
return None
|
||||
data = json_module.loads(payload)
|
||||
return data if isinstance(data, dict) else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_candidate_urls_for_item(
|
||||
result: Any,
|
||||
backend: Any,
|
||||
@@ -1224,45 +1146,19 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
)
|
||||
return 1
|
||||
|
||||
info = _scrape_ytdlp_info(scrape_target)
|
||||
if not info:
|
||||
log(
|
||||
"yt-dlp could not extract metadata for this URL (unsupported or failed)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
ytdlp_provider = get_metadata_provider("ytdlp", config)
|
||||
if ytdlp_provider is None:
|
||||
log("yt-dlp metadata provider is unavailable", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
from SYS.metadata import extract_ytdlp_tags
|
||||
tags = [
|
||||
str(t)
|
||||
for t in ytdlp_provider.search_tags(scrape_target, limit=1)
|
||||
if t is not None
|
||||
]
|
||||
except Exception:
|
||||
extract_ytdlp_tags = None # type: ignore[assignment]
|
||||
|
||||
# Prefer the top-level metadata, but if this is a playlist container, use
|
||||
# the first entry for per-item fields like subtitles.
|
||||
info_for_subs = info
|
||||
entries = info.get("entries") if isinstance(info, dict) else None
|
||||
if isinstance(entries, list) and entries:
|
||||
first = entries[0]
|
||||
if isinstance(first, dict):
|
||||
info_for_subs = first
|
||||
|
||||
tags: List[str] = []
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
tags.extend(extract_ytdlp_tags(info))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Subtitle availability tags
|
||||
try:
|
||||
tags.extend(
|
||||
_extract_subtitle_tags(
|
||||
info_for_subs if isinstance(info_for_subs,
|
||||
dict) else {}
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
tags = []
|
||||
|
||||
# Ensure we actually have something to apply.
|
||||
tags = _dedup_tags_preserve_order(tags)
|
||||
@@ -1399,19 +1295,10 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
identifiers = _extract_scrapable_identifiers(identifier_tags)
|
||||
identifier_query: Optional[str] = None
|
||||
if identifiers:
|
||||
if provider.name in {"openlibrary",
|
||||
"googlebooks",
|
||||
"google"}:
|
||||
identifier_query = (
|
||||
identifiers.get("isbn_13") or identifiers.get("isbn_10")
|
||||
or identifiers.get("isbn") or identifiers.get("openlibrary")
|
||||
)
|
||||
elif provider.name == "imdb":
|
||||
identifier_query = identifiers.get("imdb")
|
||||
elif provider.name == "itunes":
|
||||
identifier_query = identifiers.get("musicbrainz") or identifiers.get(
|
||||
"musicbrainzalbum"
|
||||
)
|
||||
try:
|
||||
identifier_query = provider.identifier_query(identifiers)
|
||||
except Exception:
|
||||
identifier_query = None
|
||||
|
||||
# Determine query from identifier first, else title on the result or filename
|
||||
title_hint = (
|
||||
@@ -1444,32 +1331,21 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
artist_hint = str(meta_artist)
|
||||
|
||||
combined_query: Optional[str] = None
|
||||
if (not identifier_query and title_hint and artist_hint
|
||||
and provider.name in {"itunes",
|
||||
"musicbrainz"}):
|
||||
if provider.name == "musicbrainz":
|
||||
combined_query = f'recording:"{title_hint}" AND artist:"{artist_hint}"'
|
||||
else:
|
||||
combined_query = f"{title_hint} {artist_hint}"
|
||||
if not identifier_query and title_hint and artist_hint:
|
||||
try:
|
||||
combined_query = provider.combined_query(
|
||||
title_hint=str(title_hint),
|
||||
artist_hint=str(artist_hint),
|
||||
)
|
||||
except Exception:
|
||||
combined_query = None
|
||||
|
||||
# yt-dlp isn't a search provider; it requires a URL.
|
||||
url_hint: Optional[str] = None
|
||||
if provider.name == "ytdlp":
|
||||
raw_url = (
|
||||
get_field(result,
|
||||
"url",
|
||||
None) or get_field(result,
|
||||
"source_url",
|
||||
None) or get_field(result,
|
||||
"target",
|
||||
None)
|
||||
)
|
||||
if isinstance(raw_url, list) and raw_url:
|
||||
raw_url = raw_url[0]
|
||||
if isinstance(raw_url,
|
||||
str) and raw_url.strip().startswith(("http://",
|
||||
"https://")):
|
||||
url_hint = raw_url.strip()
|
||||
try:
|
||||
url_hint = provider.extract_url_query(result, get_field)
|
||||
except Exception:
|
||||
url_hint = None
|
||||
|
||||
query_hint = url_hint or identifier_query or combined_query or title_hint
|
||||
if not query_hint:
|
||||
@@ -1492,7 +1368,12 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
return 1
|
||||
|
||||
# For yt-dlp, emit tags directly (there is no meaningful multi-result selection step).
|
||||
if provider.name == "ytdlp":
|
||||
emit_direct = False
|
||||
try:
|
||||
emit_direct = bool(provider.emits_direct_tags())
|
||||
except Exception:
|
||||
emit_direct = False
|
||||
if emit_direct:
|
||||
try:
|
||||
tags = [str(t) for t in provider.to_tags(items[0]) if t is not None]
|
||||
except Exception:
|
||||
|
||||
Reference in New Issue
Block a user