d

2026-02-11 19:06:38 -08:00
parent 1d0de1118b
commit ba623cb992
20 changed files with 847 additions and 246 deletions
@@ -585,6 +585,15 @@ def parse_cmdlet_args(args: Sequence[str],
            result = parse_cmdlet_args(["value1", "-count", "5"], cmdlet)
            # result = {"path": "value1", "count": "5"}
    """
+        try:
+        from SYS.cmdlet_spec import parse_cmdlet_args as _parse_cmdlet_args_fast
+
+        return _parse_cmdlet_args_fast(args, cmdlet_spec)
+        except Exception:
+        # Fall back to local implementation below to preserve behavior if the
+        # lightweight parser is unavailable.
+        pass
+
    result: Dict[str,
                 Any] = {}

@@ -27,6 +27,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
 from SYS import pipeline as ctx
 from SYS.pipeline_progress import PipelineProgress
 from . import _shared as sh
+from SYS.field_access import get_field

 normalize_hash = sh.normalize_hash
 looks_like_hash = sh.looks_like_hash
@@ -34,7 +35,6 @@ Cmdlet = sh.Cmdlet
 CmdletArg = sh.CmdletArg
 SharedArgs = sh.SharedArgs
 parse_cmdlet_args = sh.parse_cmdlet_args
-get_field = sh.get_field

 try:
    from SYS.metadata import extract_title
@@ -60,84 +60,6 @@ def _dedup_tags_preserve_order(tags: List[str]) -> List[str]:
    return out


-def _extract_subtitle_tags(info: Dict[str, Any]) -> List[str]:
-    """Extract subtitle availability tags from a yt-dlp info dict.
-
-    Produces multi-valued tags so languages can coexist:
-    - subs:<lang>
-    - subs_auto:<lang>
-    """
-
-    def _langs(value: Any) -> List[str]:
-        if not isinstance(value, dict):
-            return []
-        langs: List[str] = []
-        for k in value.keys():
-            if not isinstance(k, str):
-                continue
-            lang = k.strip().lower()
-            if lang:
-                langs.append(lang)
-        return sorted(set(langs))
-
-    out: List[str] = []
-    for lang in _langs(info.get("subtitles")):
-        out.append(f"subs:{lang}")
-    for lang in _langs(info.get("automatic_captions")):
-        out.append(f"subs_auto:{lang}")
-    return out
-
-
-def _scrape_ytdlp_info(url: str) -> Optional[Dict[str, Any]]:
-    """Fetch a yt-dlp info dict without downloading media."""
-    if not isinstance(url, str) or not url.strip():
-        return None
-    url = url.strip()
-
-    # Prefer the Python module when available (faster, avoids shell quoting issues).
-    try:
-        import yt_dlp  # type: ignore
-
-        opts: Any = {
-            "quiet": True,
-            "no_warnings": True,
-            "skip_download": True,
-            "noprogress": True,
-            "socket_timeout": 15,
-            "retries": 1,
-            "playlist_items": "1-10",
-        }
-        with yt_dlp.YoutubeDL(opts) as ydl:
-            info = ydl.extract_info(url, download=False)
-            return info if isinstance(info, dict) else None
-    except Exception:
-        pass
-
-    # Fallback to yt-dlp CLI if the module isn't available.
-    try:
-        import json as json_module
-
-        cmd = [
-            "yt-dlp",
-            "-J",
-            "--no-warnings",
-            "--skip-download",
-            "--playlist-items",
-            "1-10",
-            url,
-        ]
-        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
-        if result.returncode != 0:
-            return None
-        payload = (result.stdout or "").strip()
-        if not payload:
-            return None
-        data = json_module.loads(payload)
-        return data if isinstance(data, dict) else None
-    except Exception:
-        return None
-
-
 def _resolve_candidate_urls_for_item(
    result: Any,
    backend: Any,
@@ -1224,45 +1146,19 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
                    )
                    return 1

-                info = _scrape_ytdlp_info(scrape_target)
-                if not info:
-                    log(
-                        "yt-dlp could not extract metadata for this URL (unsupported or failed)",
-                        file=sys.stderr,
-                    )
+                ytdlp_provider = get_metadata_provider("ytdlp", config)
+                if ytdlp_provider is None:
+                    log("yt-dlp metadata provider is unavailable", file=sys.stderr)
                    return 1

                try:
-                    from SYS.metadata import extract_ytdlp_tags
+                    tags = [
+                        str(t)
+                        for t in ytdlp_provider.search_tags(scrape_target, limit=1)
+                        if t is not None
+                    ]
                except Exception:
-                    extract_ytdlp_tags = None  # type: ignore[assignment]
-
-                # Prefer the top-level metadata, but if this is a playlist container, use
-                # the first entry for per-item fields like subtitles.
-                info_for_subs = info
-                entries = info.get("entries") if isinstance(info, dict) else None
-                if isinstance(entries, list) and entries:
-                    first = entries[0]
-                    if isinstance(first, dict):
-                        info_for_subs = first
-
-                tags: List[str] = []
-                if extract_ytdlp_tags:
-                    try:
-                        tags.extend(extract_ytdlp_tags(info))
-                    except Exception:
-                        pass
-
-                # Subtitle availability tags
-                try:
-                    tags.extend(
-                        _extract_subtitle_tags(
-                            info_for_subs if isinstance(info_for_subs,
-                                                        dict) else {}
-                        )
-                    )
-                except Exception:
-                    pass
+                    tags = []

                # Ensure we actually have something to apply.
                tags = _dedup_tags_preserve_order(tags)
@@ -1399,19 +1295,10 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        identifiers = _extract_scrapable_identifiers(identifier_tags)
        identifier_query: Optional[str] = None
        if identifiers:
-            if provider.name in {"openlibrary",
-                                 "googlebooks",
-                                 "google"}:
-                identifier_query = (
-                    identifiers.get("isbn_13") or identifiers.get("isbn_10")
-                    or identifiers.get("isbn") or identifiers.get("openlibrary")
-                )
-            elif provider.name == "imdb":
-                identifier_query = identifiers.get("imdb")
-            elif provider.name == "itunes":
-                identifier_query = identifiers.get("musicbrainz") or identifiers.get(
-                    "musicbrainzalbum"
-                )
+            try:
+                identifier_query = provider.identifier_query(identifiers)
+            except Exception:
+                identifier_query = None

        # Determine query from identifier first, else title on the result or filename
        title_hint = (
@@ -1444,32 +1331,21 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
                    artist_hint = str(meta_artist)

        combined_query: Optional[str] = None
-        if (not identifier_query and title_hint and artist_hint
-                and provider.name in {"itunes",
-                                      "musicbrainz"}):
-            if provider.name == "musicbrainz":
-                combined_query = f'recording:"{title_hint}" AND artist:"{artist_hint}"'
-            else:
-                combined_query = f"{title_hint} {artist_hint}"
+        if not identifier_query and title_hint and artist_hint:
+            try:
+                combined_query = provider.combined_query(
+                    title_hint=str(title_hint),
+                    artist_hint=str(artist_hint),
+                )
+            except Exception:
+                combined_query = None

        # yt-dlp isn't a search provider; it requires a URL.
        url_hint: Optional[str] = None
-        if provider.name == "ytdlp":
-            raw_url = (
-                get_field(result,
-                          "url",
-                          None) or get_field(result,
-                                             "source_url",
-                                             None) or get_field(result,
-                                                                "target",
-                                                                None)
-            )
-            if isinstance(raw_url, list) and raw_url:
-                raw_url = raw_url[0]
-            if isinstance(raw_url,
-                          str) and raw_url.strip().startswith(("http://",
-                                                               "https://")):
-                url_hint = raw_url.strip()
+        try:
+            url_hint = provider.extract_url_query(result, get_field)
+        except Exception:
+            url_hint = None

        query_hint = url_hint or identifier_query or combined_query or title_hint
        if not query_hint:
@@ -1492,7 +1368,12 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
            return 1

        # For yt-dlp, emit tags directly (there is no meaningful multi-result selection step).
-        if provider.name == "ytdlp":
+        emit_direct = False
+        try:
+            emit_direct = bool(provider.emits_direct_tags())
+        except Exception:
+            emit_direct = False
+        if emit_direct:
            try:
                tags = [str(t) for t in provider.to_tags(items[0]) if t is not None]
            except Exception: