This commit is contained in:
2026-02-11 19:06:38 -08:00
parent 1d0de1118b
commit ba623cb992
20 changed files with 848 additions and 247 deletions

View File

@@ -585,6 +585,15 @@ def parse_cmdlet_args(args: Sequence[str],
result = parse_cmdlet_args(["value1", "-count", "5"], cmdlet)
# result = {"path": "value1", "count": "5"}
"""
try:
from SYS.cmdlet_spec import parse_cmdlet_args as _parse_cmdlet_args_fast
return _parse_cmdlet_args_fast(args, cmdlet_spec)
except Exception:
# Fall back to local implementation below to preserve behavior if the
# lightweight parser is unavailable.
pass
result: Dict[str,
Any] = {}

View File

@@ -27,6 +27,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
from SYS import pipeline as ctx
from SYS.pipeline_progress import PipelineProgress
from . import _shared as sh
from SYS.field_access import get_field
normalize_hash = sh.normalize_hash
looks_like_hash = sh.looks_like_hash
@@ -34,7 +35,6 @@ Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
get_field = sh.get_field
try:
from SYS.metadata import extract_title
@@ -60,84 +60,6 @@ def _dedup_tags_preserve_order(tags: List[str]) -> List[str]:
return out
def _extract_subtitle_tags(info: Dict[str, Any]) -> List[str]:
"""Extract subtitle availability tags from a yt-dlp info dict.
Produces multi-valued tags so languages can coexist:
- subs:<lang>
- subs_auto:<lang>
"""
def _langs(value: Any) -> List[str]:
if not isinstance(value, dict):
return []
langs: List[str] = []
for k in value.keys():
if not isinstance(k, str):
continue
lang = k.strip().lower()
if lang:
langs.append(lang)
return sorted(set(langs))
out: List[str] = []
for lang in _langs(info.get("subtitles")):
out.append(f"subs:{lang}")
for lang in _langs(info.get("automatic_captions")):
out.append(f"subs_auto:{lang}")
return out
def _scrape_ytdlp_info(url: str) -> Optional[Dict[str, Any]]:
"""Fetch a yt-dlp info dict without downloading media."""
if not isinstance(url, str) or not url.strip():
return None
url = url.strip()
# Prefer the Python module when available (faster, avoids shell quoting issues).
try:
import yt_dlp # type: ignore
opts: Any = {
"quiet": True,
"no_warnings": True,
"skip_download": True,
"noprogress": True,
"socket_timeout": 15,
"retries": 1,
"playlist_items": "1-10",
}
with yt_dlp.YoutubeDL(opts) as ydl:
info = ydl.extract_info(url, download=False)
return info if isinstance(info, dict) else None
except Exception:
pass
# Fallback to yt-dlp CLI if the module isn't available.
try:
import json as json_module
cmd = [
"yt-dlp",
"-J",
"--no-warnings",
"--skip-download",
"--playlist-items",
"1-10",
url,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode != 0:
return None
payload = (result.stdout or "").strip()
if not payload:
return None
data = json_module.loads(payload)
return data if isinstance(data, dict) else None
except Exception:
return None
def _resolve_candidate_urls_for_item(
result: Any,
backend: Any,
@@ -1224,45 +1146,19 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
)
return 1
info = _scrape_ytdlp_info(scrape_target)
if not info:
log(
"yt-dlp could not extract metadata for this URL (unsupported or failed)",
file=sys.stderr,
)
ytdlp_provider = get_metadata_provider("ytdlp", config)
if ytdlp_provider is None:
log("yt-dlp metadata provider is unavailable", file=sys.stderr)
return 1
try:
from SYS.metadata import extract_ytdlp_tags
tags = [
str(t)
for t in ytdlp_provider.search_tags(scrape_target, limit=1)
if t is not None
]
except Exception:
extract_ytdlp_tags = None # type: ignore[assignment]
# Prefer the top-level metadata, but if this is a playlist container, use
# the first entry for per-item fields like subtitles.
info_for_subs = info
entries = info.get("entries") if isinstance(info, dict) else None
if isinstance(entries, list) and entries:
first = entries[0]
if isinstance(first, dict):
info_for_subs = first
tags: List[str] = []
if extract_ytdlp_tags:
try:
tags.extend(extract_ytdlp_tags(info))
except Exception:
pass
# Subtitle availability tags
try:
tags.extend(
_extract_subtitle_tags(
info_for_subs if isinstance(info_for_subs,
dict) else {}
)
)
except Exception:
pass
tags = []
# Ensure we actually have something to apply.
tags = _dedup_tags_preserve_order(tags)
@@ -1399,19 +1295,10 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
identifiers = _extract_scrapable_identifiers(identifier_tags)
identifier_query: Optional[str] = None
if identifiers:
if provider.name in {"openlibrary",
"googlebooks",
"google"}:
identifier_query = (
identifiers.get("isbn_13") or identifiers.get("isbn_10")
or identifiers.get("isbn") or identifiers.get("openlibrary")
)
elif provider.name == "imdb":
identifier_query = identifiers.get("imdb")
elif provider.name == "itunes":
identifier_query = identifiers.get("musicbrainz") or identifiers.get(
"musicbrainzalbum"
)
try:
identifier_query = provider.identifier_query(identifiers)
except Exception:
identifier_query = None
# Determine query from identifier first, else title on the result or filename
title_hint = (
@@ -1444,32 +1331,21 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
artist_hint = str(meta_artist)
combined_query: Optional[str] = None
if (not identifier_query and title_hint and artist_hint
and provider.name in {"itunes",
"musicbrainz"}):
if provider.name == "musicbrainz":
combined_query = f'recording:"{title_hint}" AND artist:"{artist_hint}"'
else:
combined_query = f"{title_hint} {artist_hint}"
if not identifier_query and title_hint and artist_hint:
try:
combined_query = provider.combined_query(
title_hint=str(title_hint),
artist_hint=str(artist_hint),
)
except Exception:
combined_query = None
# yt-dlp isn't a search provider; it requires a URL.
url_hint: Optional[str] = None
if provider.name == "ytdlp":
raw_url = (
get_field(result,
"url",
None) or get_field(result,
"source_url",
None) or get_field(result,
"target",
None)
)
if isinstance(raw_url, list) and raw_url:
raw_url = raw_url[0]
if isinstance(raw_url,
str) and raw_url.strip().startswith(("http://",
"https://")):
url_hint = raw_url.strip()
try:
url_hint = provider.extract_url_query(result, get_field)
except Exception:
url_hint = None
query_hint = url_hint or identifier_query or combined_query or title_hint
if not query_hint:
@@ -1492,7 +1368,12 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
# For yt-dlp, emit tags directly (there is no meaningful multi-result selection step).
if provider.name == "ytdlp":
emit_direct = False
try:
emit_direct = bool(provider.emits_direct_tags())
except Exception:
emit_direct = False
if emit_direct:
try:
tags = [str(t) for t in provider.to_tags(items[0]) if t is not None]
except Exception: