jkj

2025-12-16 23:23:43 -08:00
parent 9873280f0e
commit 86918f2ae2
46 changed files with 2277 additions and 1347 deletions
@@ -244,6 +244,8 @@ class HTTPClient:
        self,
        method: str,
        url: str,
+        raise_for_status: bool = True,
+        log_http_errors: bool = True,
        **kwargs
    ) -> httpx.Response:
        """
@@ -273,6 +275,7 @@ class HTTPClient:
        for attempt in range(self.retries):
            try:
                response = self._client.request(method, url, **kwargs)
+                if raise_for_status:
                    response.raise_for_status()
                return response
            except httpx.TimeoutException as e:
@@ -287,6 +290,7 @@ class HTTPClient:
                        response_text = e.response.text[:500]
                    except:
                        response_text = "<unable to read response>"
+                    if log_http_errors:
                        logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
                    raise
                last_exception = e
@@ -71,6 +71,7 @@ class HydrusNetwork:
    url: str
    access_key: str = ""
    timeout: float = 60.0
+    instance_name: str = ""  # Optional store name (e.g., 'home') for namespaced logs

    scheme: str = field(init=False)
    hostname: str = field(init=False)
@@ -90,6 +91,12 @@ class HydrusNetwork:
        self.port = parsed.port or (443 if self.scheme == "https" else 80)
        self.base_path = parsed.path.rstrip("/")
        self.access_key = self.access_key or ""
+        self.instance_name = str(self.instance_name or "").strip()
+
+    def _log_prefix(self) -> str:
+        if self.instance_name:
+            return f"[hydrusnetwork:{self.instance_name}]"
+        return f"[hydrusnetwork:{self.hostname}:{self.port}]"

    # ------------------------------------------------------------------
    # low-level helpers
@@ -120,7 +127,7 @@ class HydrusNetwork:
        url = f"{self.scheme}://{self.hostname}:{self.port}{path}"
        
        # Log request details
-        logger.debug(f"[Hydrus] {spec.method} {spec.endpoint} (auth: {'session_key' if self._session_key else 'access_key' if self.access_key else 'none'})")
+        logger.debug(f"{self._log_prefix()} {spec.method} {spec.endpoint} (auth: {'session_key' if self._session_key else 'access_key' if self.access_key else 'none'})")

        status = 0
        reason = ""
@@ -135,14 +142,14 @@ class HydrusNetwork:
                    file_path = Path(spec.file_path)
                    if not file_path.is_file():
                        error_msg = f"Upload file not found: {file_path}"
-                        logger.error(f"[Hydrus] {error_msg}")
+                        logger.error(f"{self._log_prefix()} {error_msg}")
                        raise FileNotFoundError(error_msg)

                    file_size = file_path.stat().st_size
                    headers["Content-Type"] = spec.content_type or "application/octet-stream"
                    headers["Content-Length"] = str(file_size)
                    
-                    logger.debug(f"[Hydrus] Uploading file {file_path.name} ({file_size} bytes)")
+                    logger.debug(f"{self._log_prefix()} Uploading file {file_path.name} ({file_size} bytes)")

                    def file_gen():
                        with file_path.open("rb") as handle:
@@ -153,7 +160,9 @@ class HydrusNetwork:
                        spec.method,
                        url,
                        content=file_gen(),
-                        headers=headers
+                        headers=headers,
+                        raise_for_status=False,
+                        log_http_errors=False,
                    )
                else:
                    content = None
@@ -163,14 +172,16 @@ class HydrusNetwork:
                            content = spec.data
                        else:
                            json_data = spec.data
-                        logger.debug(f"[Hydrus] Request body size: {len(content) if content else 'json'}")
+                        logger.debug(f"{self._log_prefix()} Request body size: {len(content) if content else 'json'}")
                    
                    response = client.request(
                        spec.method,
                        url,
                        content=content,
                        json=json_data,
-                        headers=headers
+                        headers=headers,
+                        raise_for_status=False,
+                        log_http_errors=False,
                    )
                
                status = response.status_code
@@ -178,20 +189,14 @@ class HydrusNetwork:
                body = response.content
                content_type = response.headers.get("Content-Type", "") or ""
                
-                logger.debug(f"[Hydrus] Response {status} {reason} ({len(body)} bytes)")
+                logger.debug(f"{self._log_prefix()} Response {status} {reason} ({len(body)} bytes)")

        except (httpx.ConnectError, httpx.TimeoutException, httpx.NetworkError) as exc:
            msg = f"Hydrus unavailable: {exc}"
-            logger.warning(f"[Hydrus] {msg}")
+            logger.warning(f"{self._log_prefix()} {msg}")
            raise HydrusConnectionError(msg) from exc
-        except httpx.HTTPStatusError as exc:
-            response = exc.response
-            status = response.status_code
-            reason = response.reason_phrase
-            body = response.content
-            content_type = response.headers.get("Content-Type", "") or ""
        except Exception as exc:
-            logger.error(f"[Hydrus] Connection error: {exc}", exc_info=True)
+            logger.error(f"{self._log_prefix()} Connection error: {exc}", exc_info=True)
            raise

        payload: Any
@@ -220,18 +225,22 @@ class HydrusNetwork:
            else:
                message = reason or "HTTP error"

-            logger.error(f"[Hydrus] HTTP {status}: {message}")
+            # Some endpoints are naturally "missing" sometimes and should not spam logs.
+            if status == 404 and spec.endpoint.rstrip("/") == "/get_files/file_path":
+                return {}
+
+            logger.error(f"{self._log_prefix()} HTTP {status}: {message}")
            
            # Handle expired session key (419) by clearing cache and retrying once
            if status == 419 and self._session_key and "session" in message.lower():
-                logger.warning(f"[Hydrus] Session key expired, acquiring new one and retrying...")
+                logger.warning(f"{self._log_prefix()} Session key expired, acquiring new one and retrying...")
                self._session_key = ""  # Clear expired session key
                try:
                    self._acquire_session_key()
                    # Retry the request with new session key
                    return self._perform_request(spec)
                except Exception as retry_error:
-                    logger.error(f"[Hydrus] Retry failed: {retry_error}", exc_info=True)
+                    logger.error(f"{self._log_prefix()} Retry failed: {retry_error}", exc_info=True)
                    # If retry fails, raise the original error
                    raise HydrusRequestError(status, message, payload) from retry_error
            
@@ -316,6 +325,16 @@ class HydrusNetwork:
    def add_file(self, file_path: Path) -> dict[str, Any]:
        return self._post("/add_files/add_file", file_path=file_path)

+    def undelete_files(self, hashes: Union[str, Iterable[str]]) -> dict[str, Any]:
+        """Restore files from Hydrus trash back into 'my files'.
+
+        Hydrus Client API: POST /add_files/undelete_files
+        Required JSON args: {"hashes": [<sha256 hex>, ...]}
+        """
+        hash_list = self._ensure_hashes(hashes)
+        body = {"hashes": hash_list}
+        return self._post("/add_files/undelete_files", data=body)
+
    def add_tag(self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_name: str) -> dict[str, Any]:
        hash = self._ensure_hashes(hash)
        body = {"hashes": hash, "service_names_to_tags": {service_name: list(tags)}}
@@ -68,7 +68,7 @@ from typing import Callable


 from config import get_local_storage_path, load_config
-from cmdlet.catalog import (
+from cmdlet_catalog import (
    import_cmd_module as _catalog_import_cmd_module,
    list_cmdlet_metadata as _catalog_list_cmdlet_metadata,
    list_cmdlet_names as _catalog_list_cmdlet_names,
@@ -305,8 +305,6 @@ def _get_table_title_for_command(
        'add_file': 'Results',
        'delete-file': 'Results',
        'delete_file': 'Results',
-        'check-file-status': 'Status',
-        'check_file_status': 'Status',
        'get-metadata': None,
        'get_metadata': None,
    }
@@ -843,10 +841,6 @@ def _create_cmdlet_cli():
        # Load config
        config = _load_cli_config()
        
-        # Initialize cookies check for yt-dlp
-        from hydrus_health_check import initialize_cookies_check
-        initialize_cookies_check(config, emit_debug=False)
-        
        # Initialize debug logging if enabled
        if config:
            from SYS.logger import set_debug
@@ -991,8 +985,6 @@ def _create_cmdlet_cli():

            # Run startup checks and render table
            try:
-                from hydrus_health_check import initialize_cookies_check
-
                # MPV availability is validated by MPV.MPV.__init__.
                try:
                    from MPV.mpv_ipc import MPV
@@ -1294,8 +1286,13 @@ def _create_cmdlet_cli():

                # Cookies are used by yt-dlp; keep this centralized utility.
                try:
-                    ok, detail = initialize_cookies_check(config, emit_debug=False)
-                    _add_startup_check("FOUND" if ok else "MISSING", "Cookies", "N/A", detail or "Not found")
+                    from tool.ytdlp import YtDlpTool
+
+                    cookiefile = YtDlpTool(config).resolve_cookiefile()
+                    if cookiefile is not None:
+                        _add_startup_check("FOUND", "Cookies", "N/A", str(cookiefile))
+                    else:
+                        _add_startup_check("MISSING", "Cookies", "N/A", "Not found")
                except Exception as exc:
                    _add_startup_check("ERROR", "Cookies", "N/A", str(exc))

@@ -1580,10 +1577,11 @@ def _execute_pipeline(tokens: list):
                                    hash_val = getattr(item, 'hash', getattr(item, 'hash_hex', 'N/A'))
                                    title_val = getattr(item, 'title', 'N/A')
                                if hash_val != 'N/A':
-                                    hash_display = hash_val[:8] + '...' if len(str(hash_val)) > 8 else hash_val
-                                    print(f" -> hash={hash_display}, title={title_val}")
+                                    hash_display = str(hash_val)
+                                    title_display = str(title_val)
+                                    print(f" -> hash:{hash_display}, title:{title_display}")
                                else:
-                                    print(f" -> title={title_val}")
+                                    print(f" -> title:{title_val}")
                            else:
                                print(" -> [source_index out of range]")
                if resolved_list is not None:
@@ -2143,14 +2141,14 @@ def _execute_pipeline(tokens: list):
                                display_only_commands = {
                                    'get-note', 'get_note',
                                    'get-relationship', 'get_relationship', 'get-file', 'get_file',
-                                    'check-file-status', 'check_file_status'
                                }
                                # Commands that manage their own table/history state (e.g. get-tag)
                                self_managing_commands = {
                                    'get-tag', 'get_tag', 'tags',
                                    'get-url', 'get_url',
                                    'search-file', 'search_file',
-                                    'search-provider', 'search_provider'
+                                    'search-provider', 'search_provider',
+                                    'search-store', 'search_store'
                                }

                                overlay_table = ctx.get_display_table() if hasattr(ctx, 'get_display_table') else None
@@ -2382,7 +2380,7 @@ def _execute_cmdlet(cmd_name: str, args: list):
        
        # Ensure native commands (cmdnat) are loaded
        try:
-            from cmdlet.catalog import ensure_registry_loaded as _ensure_registry_loaded
+            from cmdlet_catalog import ensure_registry_loaded as _ensure_registry_loaded
            _ensure_registry_loaded()
        except Exception:
            pass
@@ -2391,7 +2389,7 @@ def _execute_cmdlet(cmd_name: str, args: list):
        cmd_fn = REGISTRY.get(cmd_name)
        if not cmd_fn:
            # Attempt lazy import of the module and retry
-            from cmdlet.catalog import import_cmd_module as _catalog_import
+            from cmdlet_catalog import import_cmd_module as _catalog_import
            try:
                mod = _catalog_import(cmd_name)
                data = getattr(mod, "CMDLET", None) if mod else None
@@ -2537,13 +2535,13 @@ def _execute_cmdlet(cmd_name: str, args: list):
                        display_only_commands = {
                            'get-url', 'get_url', 'get-note', 'get_note',
                            'get-relationship', 'get_relationship', 'get-file', 'get_file',
-                            'check-file-status', 'check_file_status'
                        }
                        # Commands that manage their own table/history state (e.g. get-tag)
                        self_managing_commands = {
                            'get-tag', 'get_tag', 'tags',
                            'search-file', 'search_file',
-                            'search-provider', 'search_provider'
+                            'search-provider', 'search_provider',
+                            'search-store', 'search_store'
                        }
                        
                        if cmd_name in self_managing_commands:
@@ -2596,7 +2594,6 @@ def _execute_cmdlet(cmd_name: str, args: list):
                    display_only_commands = {
                        'get-url', 'get_url', 'get-note', 'get_note',
                        'get-relationship', 'get_relationship', 'get-file', 'get_file',
-                        'check-file-status', 'check_file_status'
                    }
                    self_managing_commands = {
                        'get-tag', 'get_tag', 'tags',
@@ -15,11 +15,11 @@ from SYS.logger import log
 from models import ProgressBar


-# Optional dependencies
+# Optional dependency for HTML scraping fallbacks
 try:
-    from bs4 import BeautifulSoup
+    from lxml import html as lxml_html
 except ImportError:
-    BeautifulSoup = None
+    lxml_html = None


 class Libgen(SearchProvider):
@@ -116,7 +116,7 @@ class Libgen(SearchProvider):
            return []

    def validate(self) -> bool:
-        # JSON-based searching can work without BeautifulSoup; HTML parsing is a fallback.
+        # JSON-based searching can work without lxml; HTML parsing is a fallback.
        return True

    def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
@@ -342,8 +342,8 @@ class LibgenSearch:

        Uses a total time budget across mirrors to avoid long hangs.
        """
-        # Prefer JSON API (no BeautifulSoup needed); HTML scraping is a fallback.
-        has_bs4 = BeautifulSoup is not None
+        # Prefer JSON API (no lxml needed); HTML scraping is a fallback.
+        has_lxml = lxml_html is not None

        started = time.monotonic()

@@ -372,7 +372,7 @@ class LibgenSearch:
                    results = []

                if not results:
-                    if not has_bs4:
+                    if not has_lxml:
                        continue

                    if "libgen.li" in mirror or "libgen.gl" in mirror:
@@ -417,57 +417,73 @@ class LibgenSearch:
        resp = self.session.get(url, params=params, timeout=timeout)
        resp.raise_for_status()

-        if BeautifulSoup is None:
+        if lxml_html is None:
            return []
-        soup = BeautifulSoup(resp.text, "html.parser")

-        table = soup.find("table", {"class": "c"})
-        if not table:
-            tables = soup.find_all("table")
-            for t in tables:
-                if len(t.find_all("tr")) > 5:
+        def _text(el: Any) -> str:
+            return " ".join([t.strip() for t in el.itertext() if t and str(t).strip()]).strip()
+
+        try:
+            doc = lxml_html.fromstring(resp.content)
+        except Exception:
+            return []
+
+        table_nodes = doc.xpath(
+            "//table[contains(concat(' ', normalize-space(@class), ' '), ' c ')]"
+        )
+        table = table_nodes[0] if table_nodes else None
+        if table is None:
+            for t in doc.xpath("//table"):
+                if len(t.xpath(".//tr")) > 5:
                    table = t
                    break

-        if not table:
+        if table is None:
            return []

        results: List[Dict[str, Any]] = []
-        rows = table.find_all("tr")[1:]
+        rows = table.xpath(".//tr")[1:]

        for row in rows:
-            cols = row.find_all("td")
+            cols = row.xpath("./td")
            if len(cols) < 9:
                continue

            try:
-                libgen_id = cols[0].get_text(strip=True)
-                authors = [a.get_text(strip=True) for a in cols[1].find_all("a")]
-                if not authors:
-                    authors = [cols[1].get_text(strip=True)]
+                libgen_id = _text(cols[0])

-                title_tag = cols[2].find("a")
-                title = title_tag.get_text(strip=True) if title_tag else cols[2].get_text(strip=True)
+                author_links = cols[1].xpath(".//a")
+                authors = [_text(a) for a in author_links if _text(a)]
+                if not authors:
+                    authors = [_text(cols[1])]
+
+                title_tag = None
+                title_links = cols[2].xpath(".//a")
+                if title_links:
+                    title_tag = title_links[0]
+                title = _text(title_tag) if title_tag is not None else _text(cols[2])

                md5 = ""
-                if title_tag and title_tag.has_attr("href"):
+                if title_tag is not None:
                    href = str(title_tag.get("href") or "")
                    match = re.search(r"md5=([a-fA-F0-9]{32})", href)
                    if match:
                        md5 = match.group(1)

-                publisher = cols[3].get_text(strip=True)
-                year = cols[4].get_text(strip=True)
-                pages = cols[5].get_text(strip=True)
-                language = cols[6].get_text(strip=True)
-                size = cols[7].get_text(strip=True)
-                extension = cols[8].get_text(strip=True)
+                publisher = _text(cols[3])
+                year = _text(cols[4])
+                pages = _text(cols[5])
+                language = _text(cols[6])
+                size = _text(cols[7])
+                extension = _text(cols[8])

-                mirror_links = []
+                mirror_links: List[str] = []
                for i in range(9, len(cols)):
-                    a = cols[i].find("a")
-                    if a and a.has_attr("href"):
-                        mirror_links.append(a["href"])
+                    a_nodes = cols[i].xpath(".//a[@href]")
+                    if a_nodes:
+                        href = str(a_nodes[0].get("href") or "").strip()
+                        if href:
+                            mirror_links.append(href)

                if md5:
                    download_link = f"http://library.lol/main/{md5}"
@@ -476,10 +492,11 @@ class LibgenSearch:
                else:
                    download_link = ""

-                results.append({
+                results.append(
+                    {
                        "id": libgen_id,
                        "title": title,
-                    "author": ", ".join(authors),
+                        "author": ", ".join([a for a in authors if a]) or "Unknown",
                        "publisher": publisher,
                        "year": year,
                        "pages": pages,
@@ -489,11 +506,11 @@ class LibgenSearch:
                        "md5": md5,
                        "mirror_url": download_link,
                        "cover": "",
-                })
+                    }
+                )

                if len(results) >= limit:
                    break
-
            except Exception as e:
                logging.debug(f"Error parsing row: {e}")
                continue
@@ -521,21 +538,35 @@ class LibgenSearch:
        resp = self.session.get(url, params=params, timeout=timeout)
        resp.raise_for_status()

-        if BeautifulSoup is None:
+        if lxml_html is None:
            return []
-        soup = BeautifulSoup(resp.text, "html.parser")
-        table = soup.find("table", {"id": "tablelibgen"})
-        if not table:
-            table = soup.find("table", {"class": "table table-striped"})

-        if not table:
+        def _text(el: Any) -> str:
+            return " ".join([t.strip() for t in el.itertext() if t and str(t).strip()]).strip()
+
+        try:
+            doc = lxml_html.fromstring(resp.content)
+        except Exception:
+            return []
+
+        table_nodes = doc.xpath("//table[@id='tablelibgen']")
+        table = table_nodes[0] if table_nodes else None
+        if table is None:
+            # Common libgen.li/gl fallback
+            table_nodes = doc.xpath(
+                "//table[contains(concat(' ', normalize-space(@class), ' '), ' table ') and "
+                "contains(concat(' ', normalize-space(@class), ' '), ' table-striped ')]"
+            )
+            table = table_nodes[0] if table_nodes else None
+
+        if table is None:
            return []

        results: List[Dict[str, Any]] = []
-        rows = table.find_all("tr")[1:]
+        rows = table.xpath(".//tr")[1:]

        for row in rows:
-            cols = row.find_all("td")
+            cols = row.xpath("./td")
            if len(cols) < 9:
                continue

@@ -543,26 +574,30 @@ class LibgenSearch:
                # Extract md5 (libgen.gl exposes /ads.php?md5=... in mirror column)
                md5 = ""
                mirror_url = ""
-                for a in row.find_all("a"):
-                    href = a.get("href")
+                for a in row.xpath(".//a[@href]"):
+                    href = str(a.get("href") or "")
                    if not href:
                        continue
-                    m = re.search(r"md5=([a-fA-F0-9]{32})", str(href))
+                    m = re.search(r"md5=([a-fA-F0-9]{32})", href)
                    if m:
                        md5 = m.group(1)
-                        if "ads.php" in str(href):
-                            mirror_url = urljoin(mirror, str(href))
+                        if "ads.php" in href:
+                            mirror_url = urljoin(mirror, href)
                        break
                if not mirror_url and md5:
                    mirror_url = urljoin(mirror, f"/ads.php?md5={md5}")

                # Extract numeric file id from /file.php?id=...
                libgen_id = ""
-                file_link = row.find("a", href=re.compile(r"/file\.php\?id=\d+"))
-                if file_link and file_link.get("href"):
-                    m = re.search(r"id=(\d+)", str(file_link.get("href")))
+                for a in row.xpath(".//a[@href]"):
+                    href = str(a.get("href") or "")
+                    if not href:
+                        continue
+                    if re.search(r"/file\.php\?id=\d+", href):
+                        m = re.search(r"id=(\d+)", href)
                        if m:
                            libgen_id = m.group(1)
+                            break

                title = ""
                authors = ""
@@ -585,7 +620,7 @@ class LibgenSearch:

                if offset is not None:
                    meta_cell = cols[offset]
-                    meta_text = " ".join([str(s).strip() for s in meta_cell.stripped_strings if str(s).strip()])
+                    meta_text = _text(meta_cell)

                    # Extract ISBNs from meta cell (avoid using them as title)
                    # Matches 10 or 13-digit ISBN with optional leading 978/979.
@@ -601,11 +636,11 @@ class LibgenSearch:
                    # Choose a "real" title from meta cell.
                    # libgen.gl meta can include series/edition/isbn blobs; prefer text with letters.
                    raw_candidates: List[str] = []
-                    for a in meta_cell.find_all("a"):
-                        t = a.get_text(" ", strip=True)
+                    for a in meta_cell.xpath(".//a"):
+                        t = _text(a)
                        if t:
                            raw_candidates.append(t)
-                    for s in meta_cell.stripped_strings:
+                    for s in meta_cell.itertext():
                        t = str(s).strip()
                        if t:
                            raw_candidates.append(t)
@@ -645,27 +680,27 @@ class LibgenSearch:
                            best_score = score
                            best_title = cand

-                    title = best_title or meta_cell.get_text(" ", strip=True)
+                    title = best_title or _text(meta_cell)

-                    authors = cols[offset + 1].get_text(" ", strip=True)
-                    publisher = cols[offset + 2].get_text(" ", strip=True)
-                    year = cols[offset + 3].get_text(" ", strip=True)
-                    language = cols[offset + 4].get_text(" ", strip=True)
-                    pages = cols[offset + 5].get_text(" ", strip=True)
-                    size = cols[offset + 6].get_text(" ", strip=True)
-                    extension = cols[offset + 7].get_text(" ", strip=True)
+                    authors = _text(cols[offset + 1])
+                    publisher = _text(cols[offset + 2])
+                    year = _text(cols[offset + 3])
+                    language = _text(cols[offset + 4])
+                    pages = _text(cols[offset + 5])
+                    size = _text(cols[offset + 6])
+                    extension = _text(cols[offset + 7])
                else:
                    # Older fallback structure
                    title_col = cols[1]
-                    title_link = title_col.find("a")
-                    title = title_link.get_text(" ", strip=True) if title_link else title_col.get_text(" ", strip=True)
-                    authors = cols[2].get_text(" ", strip=True)
-                    publisher = cols[3].get_text(" ", strip=True)
-                    year = cols[4].get_text(" ", strip=True)
-                    language = cols[5].get_text(" ", strip=True)
-                    pages = cols[6].get_text(" ", strip=True)
-                    size = cols[7].get_text(" ", strip=True)
-                    extension = cols[8].get_text(" ", strip=True)
+                    title_links = title_col.xpath(".//a")
+                    title = _text(title_links[0]) if title_links else _text(title_col)
+                    authors = _text(cols[2])
+                    publisher = _text(cols[3])
+                    year = _text(cols[4])
+                    language = _text(cols[5])
+                    pages = _text(cols[6])
+                    size = _text(cols[7])
+                    extension = _text(cols[8])

                title = (title or "").strip() or "Unknown"
                authors = (authors or "").strip() or "Unknown"
@@ -729,15 +764,49 @@ def _resolve_download_url(
    current_url = url
    visited = set()

-    if BeautifulSoup is None:
-        _call(log_info, "[resolve] BeautifulSoup not available; cannot resolve HTML download chain")
+    def _resolve_html_links_regex(base_url: str, html: str) -> Optional[str]:
+        """Best-effort HTML link resolver without lxml.
+
+        This is intentionally minimal: it primarily targets LibGen landing pages like
+        `/ads.php?md5=...` which contain a `get.php?md5=...` link.
+        """
+        if not html:
            return None

-    def _find_a_by_text(pattern: str) -> Optional[Any]:
-        for a in soup.find_all("a"):
-            t = a.get_text(" ", strip=True)
+        # Prefer explicit get.php md5 links (most common successful chain).
+        m = re.search(r'href=["\']([^"\']*get\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']', html, flags=re.IGNORECASE)
+        if m:
+            href = str(m.group(1) or "").strip()
+            if href and not href.lower().startswith("javascript:"):
+                return urljoin(base_url, href)
+
+        # Next: library.lol main links.
+        m = re.search(r'href=["\']([^"\']*library\.lol[^"\']*)["\']', html, flags=re.IGNORECASE)
+        if m:
+            href = str(m.group(1) or "").strip()
+            if href and not href.lower().startswith("javascript:"):
+                return urljoin(base_url, href)
+
+        # Finally: any direct file extension link.
+        m = re.search(
+            r'href=["\']([^"\']+\.(?:pdf|epub|mobi|djvu|azw3|cbz|cbr)(?:\?[^"\']*)?)["\']',
+            html,
+            flags=re.IGNORECASE,
+        )
+        if m:
+            href = str(m.group(1) or "").strip()
+            if href and not href.lower().startswith("javascript:"):
+                return urljoin(base_url, href)
+
+        return None
+
+    def _find_href_by_text(doc: Any, pattern: str) -> Optional[str]:
+        for a in doc.xpath("//a[@href]"):
+            t = " ".join([s.strip() for s in a.itertext() if s and str(s).strip()]).strip()
            if t and re.search(pattern, t, re.IGNORECASE):
-                return a
+                href = str(a.get("href") or "").strip()
+                if href and not href.lower().startswith("javascript:"):
+                    return href
        return None

    for _ in range(6):
@@ -763,42 +832,58 @@ def _resolve_download_url(
            _call(log_info, f"[resolve] Failed to fetch {current_url}: {e}")
            return None

-        soup = BeautifulSoup(content, "html.parser")
+        doc = None
+        if lxml_html is not None:
+            try:
+                doc = lxml_html.fromstring(content)
+            except Exception:
+                doc = None

-        get_link = _find_a_by_text(r"^GET$")
-        if get_link and get_link.has_attr("href"):
-            return urljoin(current_url, str(get_link.get("href") or ""))
+        if doc is None:
+            next_url = _resolve_html_links_regex(current_url, content)
+            if next_url:
+                current_url = next_url
+                continue
+            _call(log_info, "[resolve] lxml not available and regex resolver found no links")
+            return None
+
+        get_href = _find_href_by_text(doc, r"^GET$")
+        if get_href:
+            return urljoin(current_url, get_href)

        if "series.php" in current_url:
-            edition_link = soup.find("a", href=re.compile(r"edition\.php"))
-            if edition_link:
-                current_url = urljoin(current_url, str(edition_link.get("href") or ""))
+            hrefs = doc.xpath("//a[contains(@href,'edition.php')]/@href")
+            if hrefs:
+                current_url = urljoin(current_url, str(hrefs[0] or ""))
                continue

        if "edition.php" in current_url:
-            file_link = soup.find("a", href=re.compile(r"file\.php"))
-            if file_link:
-                current_url = urljoin(current_url, str(file_link.get("href") or ""))
+            hrefs = doc.xpath("//a[contains(@href,'file.php')]/@href")
+            if hrefs:
+                current_url = urljoin(current_url, str(hrefs[0] or ""))
                continue

        if "file.php" in current_url:
-            libgen_link = soup.find("a", title="libgen")
-            if not libgen_link:
-                libgen_link = _find_a_by_text(r"Libgen")
-
-            if libgen_link and libgen_link.has_attr("href"):
-                current_url = urljoin(current_url, str(libgen_link.get("href") or ""))
+            libgen_href = None
+            for a in doc.xpath("//a[@href]"):
+                if str(a.get("title") or "").strip().lower() == "libgen":
+                    libgen_href = str(a.get("href") or "").strip()
+                    break
+            if not libgen_href:
+                libgen_href = _find_href_by_text(doc, r"Libgen")
+            if libgen_href:
+                current_url = urljoin(current_url, libgen_href)
                continue

        if "ads.php" in current_url:
-            get_php_link = soup.find("a", href=re.compile(r"get\.php"))
-            if get_php_link:
-                return urljoin(current_url, str(get_php_link.get("href") or ""))
+            hrefs = doc.xpath("//a[contains(@href,'get.php')]/@href")
+            if hrefs:
+                return urljoin(current_url, str(hrefs[0] or ""))

        for text in ["Cloudflare", "IPFS.io", "Infura"]:
-            link = _find_a_by_text(re.escape(text))
-            if link and link.has_attr("href"):
-                return urljoin(current_url, str(link.get("href") or ""))
+            href = _find_href_by_text(doc, re.escape(text))
+            if href:
+                return urljoin(current_url, href)

        break

@@ -1,6 +1,7 @@
 from __future__ import annotations

 import base64
+import io
 from concurrent import futures
 import hashlib
 import json as json_module
@@ -34,6 +35,53 @@ except ImportError:
    tqdm = None  # type: ignore


+def _image_paths_to_pdf_bytes(images: List[str]) -> Optional[bytes]:
+    if not images:
+        return None
+    try:
+        from PIL import Image  # type: ignore
+    except Exception:
+        return None
+
+    pil_images: List[Any] = []
+    try:
+        for p in images:
+            img_path = Path(p)
+            if not img_path.is_file():
+                continue
+            with Image.open(img_path) as im:  # type: ignore[attr-defined]
+                # Ensure PDF-compatible mode.
+                if im.mode in {"RGBA", "LA", "P"}:
+                    im = im.convert("RGB")
+                else:
+                    im = im.convert("RGB")
+                pil_images.append(im.copy())
+    except Exception:
+        for im in pil_images:
+            try:
+                im.close()
+            except Exception:
+                pass
+        return None
+
+    if not pil_images:
+        return None
+
+    buf = io.BytesIO()
+    first, rest = pil_images[0], pil_images[1:]
+    try:
+        first.save(buf, format="PDF", save_all=True, append_images=rest)
+        return buf.getvalue()
+    except Exception:
+        return None
+    finally:
+        for im in pil_images:
+            try:
+                im.close()
+            except Exception:
+                pass
+
+
 def _looks_like_isbn(text: str) -> bool:
    t = (text or "").replace("-", "").strip()
    return t.isdigit() and len(t) in (10, 13)
@@ -941,17 +989,11 @@ class OpenLibrary(SearchProvider):
            try:
                images = self._archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id)

-                try:
-                    import img2pdf  # type: ignore
-
-                    pdf_bytes = img2pdf.convert(images) if images else None
+                pdf_bytes = _image_paths_to_pdf_bytes(images)
                if not pdf_bytes:
-                        log("[openlibrary] PDF conversion failed", file=sys.stderr)
-                        try:
-                            shutil.rmtree(temp_dir)
-                        except Exception:
-                            pass
-                        return None
+                    # Keep images folder for manual conversion.
+                    log("[openlibrary] PDF conversion failed; keeping images folder", file=sys.stderr)
+                    return Path(temp_dir)

                pdf_path = unique_path(output_dir / f"{title}.pdf")
                with open(pdf_path, "wb") as f:
@@ -963,10 +1005,6 @@ class OpenLibrary(SearchProvider):
                    pass
                return pdf_path

-                except ImportError:
-                    # Keep images folder.
-                    return Path(temp_dir)
-
            except Exception:
                try:
                    shutil.rmtree(temp_dir)
@@ -281,13 +281,6 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:

    if opts.cookies_path and opts.cookies_path.is_file():
        base_options["cookiefile"] = str(opts.cookies_path)
-    else:
-        # Check global cookies file lazily to avoid import cycles
-        from hydrus_health_check import get_cookies_file_path  # local import
-
-        global_cookies = get_cookies_file_path()
-        if global_cookies:
-            base_options["cookiefile"] = global_cookies
    else:
        # Fallback to browser cookies
        base_options["cookiesfrombrowser"] = ("chrome",)
@@ -453,21 +446,40 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
            
            # Try to find actual download link in the page
            try:
-                from bs4 import BeautifulSoup
-                soup = BeautifulSoup(response.content, 'html.parser')
+                try:
+                    from lxml import html as lxml_html
+                except ImportError:
+                    lxml_html = None

-                # Look for download links - LibGen typically has forms with download buttons
-                # Look for all links and forms that might lead to download
-                for link in soup.find_all('a'):
-                    href = link.get('href')
-                    if href and isinstance(href, str):
-                        # Look for direct file links or get.php redirects
-                        if 'get.php' in href.lower() or href.endswith(('.pdf', '.epub', '.djvu', '.mobi')):
-                            download_url = href if href.startswith('http') else urljoin(final_url, href)
+                if lxml_html is not None:
+                    doc = lxml_html.fromstring(response.content)
+                    for a in doc.xpath("//a[@href]"):
+                        href = str(a.get("href") or "").strip()
+                        if not href:
+                            continue
+
+                        href_lower = href.lower()
+                        if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
+                            download_url = href if href.startswith("http") else urljoin(final_url, href)
                            debug(f"Found download link: {download_url}")
                            return download_url
-            except ImportError:
-                pass  # BeautifulSoup not available
+                else:
+                    # Regex fallback
+                    for m in re.finditer(
+                        r"href=[\"\']([^\"\']+)[\"\']",
+                        response.text or "",
+                        flags=re.IGNORECASE,
+                    ):
+                        href = str(m.group(1) or "").strip()
+                        if not href or href.lower().startswith("javascript:"):
+                            continue
+                        href_lower = href.lower()
+                        if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
+                            download_url = href if href.startswith("http") else urljoin(final_url, href)
+                            debug(f"Found download link: {download_url}")
+                            return download_url
+            except Exception:
+                pass
            
            # If we followed redirects successfully, return the final URL
            # This handles cases where libgen redirects to a direct download mirror
@@ -708,12 +720,7 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
                "noprogress": True,  # No progress bars
            }
            
-            # Add cookies if available (lazy import to avoid circular dependency)
-            from hydrus_health_check import get_cookies_file_path  # local import
-
-            global_cookies = get_cookies_file_path()
-            if global_cookies:
-                ydl_opts["cookiefile"] = global_cookies
+            # Cookies are optional for probing; callers should pass cookiefile via DownloadOptions when needed.
            
            # Add no_playlist option if specified
            if no_playlist:
@@ -23,6 +23,10 @@ class HydrusNetwork(Store):
    Maintains its own HydrusClient.
    """

+    def _log_prefix(self) -> str:
+        store_name = getattr(self, "NAME", None) or "unknown"
+        return f"[hydrusnetwork:{store_name}]"
+
    def __new__(cls, *args: Any, **kwargs: Any) -> "HydrusNetwork":
        instance = super().__new__(cls)
        name = kwargs.get("NAME")
@@ -109,7 +113,7 @@ class HydrusNetwork(Store):
                raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err}") from exc

        # Create a persistent client for this instance (auth via access key by default).
-        self._client = HydrusClient(url=self.URL, access_key=self.API)
+        self._client = HydrusClient(url=self.URL, access_key=self.API, instance_name=self.NAME)

        # Best-effort total count (fast on Hydrus side; does not fetch IDs/hashes).
        try:
@@ -129,7 +133,7 @@ class HydrusNetwork(Store):
            if isinstance(count_val, int):
                self.total_count = count_val
        except Exception as exc:
-            debug(f"Hydrus total count unavailable for '{self.NAME}': {exc}", file=sys.stderr)
+            debug(f"{self._log_prefix()} total count unavailable: {exc}", file=sys.stderr)

    def name(self) -> str:
        return self.NAME
@@ -167,7 +171,7 @@ class HydrusNetwork(Store):
        try:
            # Compute file hash
            file_hash = sha256_file(file_path)
-            debug(f"File hash: {file_hash}")
+            debug(f"{self._log_prefix()} file hash: {file_hash}")

            # Use persistent client with session key
            client = self._client
@@ -177,11 +181,24 @@ class HydrusNetwork(Store):
            # Check if file already exists in Hydrus
            file_exists = False
            try:
-                metadata = client.fetch_file_metadata(hashes=[file_hash])
+                metadata = client.fetch_file_metadata(
+                    hashes=[file_hash],
+                    include_service_keys_to_tags=False,
+                    include_file_url=False,
+                    include_duration=False,
+                    include_size=False,
+                    include_mime=False,
+                )
                if metadata and isinstance(metadata, dict):
-                    files = metadata.get("metadata", [])
-                    if files:
+                    metas = metadata.get("metadata", [])
+                    if isinstance(metas, list) and metas:
+                        # Hydrus returns placeholder rows for unknown hashes.
+                        # Only treat as a real duplicate if it has a concrete file_id.
+                        for meta in metas:
+                            if isinstance(meta, dict) and meta.get("file_id") is not None:
                                file_exists = True
+                                break
+                if file_exists:
                    log(
                        f"ℹ️  Duplicate detected - file already in Hydrus with hash: {file_hash}",
                        file=sys.stderr,
@@ -189,9 +206,17 @@ class HydrusNetwork(Store):
            except Exception:
                pass

+            # If Hydrus reports an existing file, it may be in trash. Best-effort restore it to 'my files'.
+            # This keeps behavior aligned with user expectation: "use API only" and ensure it lands in my files.
+            if file_exists:
+                try:
+                    client.undelete_files([file_hash])
+                except Exception:
+                    pass
+
            # Upload file if not already present
            if not file_exists:
-                log(f"Uploading to Hydrus: {file_path.name}", file=sys.stderr)
+                log(f"{self._log_prefix()} Uploading: {file_path.name}", file=sys.stderr)
                response = client.add_file(file_path)

                # Extract hash from response
@@ -207,7 +232,7 @@ class HydrusNetwork(Store):
                    raise Exception(f"Hydrus response missing file hash: {response}")

                file_hash = hydrus_hash
-                log(f"Hydrus: {file_hash}", file=sys.stderr)
+                log(f"{self._log_prefix()} hash: {file_hash}", file=sys.stderr)

            # Add tags if provided (both for new and existing files)
            if tag_list:
@@ -218,27 +243,27 @@ class HydrusNetwork(Store):
                    service_name = "my tags"

                try:
-                    debug(f"Adding {len(tag_list)} tag(s) to Hydrus: {tag_list}")
+                    debug(f"{self._log_prefix()} Adding {len(tag_list)} tag(s): {tag_list}")
                    client.add_tag(file_hash, tag_list, service_name)
-                    log(f"Tags added via '{service_name}'", file=sys.stderr)
+                    log(f"{self._log_prefix()} Tags added via '{service_name}'", file=sys.stderr)
                except Exception as exc:
-                    log(f"⚠️  Failed to add tags: {exc}", file=sys.stderr)
+                    log(f"{self._log_prefix()} ⚠️  Failed to add tags: {exc}", file=sys.stderr)

            # Associate url if provided (both for new and existing files)
            if url:
-                log(f"Associating {len(url)} URL(s) with file", file=sys.stderr)
+                log(f"{self._log_prefix()} Associating {len(url)} URL(s) with file", file=sys.stderr)
                for url in url:
                    if url:
                        try:
                            client.associate_url(file_hash, str(url))
-                            debug(f"Associated URL: {url}")
+                            debug(f"{self._log_prefix()} Associated URL: {url}")
                        except Exception as exc:
-                            log(f"⚠️  Failed to associate URL {url}: {exc}", file=sys.stderr)
+                            log(f"{self._log_prefix()} ⚠️  Failed to associate URL {url}: {exc}", file=sys.stderr)

            return file_hash

        except Exception as exc:
-            log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr)
+            log(f"{self._log_prefix()} ❌ upload failed: {exc}", file=sys.stderr)
            raise

    def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
@@ -262,7 +287,8 @@ class HydrusNetwork(Store):
            if client is None:
                raise Exception("Hydrus client unavailable")

-            debug(f"Searching Hydrus for: {query}")
+            prefix = self._log_prefix()
+            debug(f"{prefix} Searching for: {query}")

            def _extract_urls(meta_obj: Any) -> list[str]:
                if not isinstance(meta_obj, dict):
@@ -446,7 +472,7 @@ class HydrusNetwork(Store):
                    tags = [query_lower]
            
            if not tags:
-                debug(f"Found 0 result(s)")
+                debug(f"{prefix} 0 result(s)")
                return []
            
            # Search files with the tags (unless url: search already produced metadata)
@@ -465,7 +491,7 @@ class HydrusNetwork(Store):
                hashes = search_result.get("hashes", []) if isinstance(search_result, dict) else []

                if not file_ids and not hashes:
-                    debug(f"Found 0 result(s)")
+                    debug(f"{prefix} 0 result(s)")
                    return []

                if file_ids:
@@ -595,7 +621,7 @@ class HydrusNetwork(Store):
                                "ext": ext,
                            })
            
-            debug(f"Found {len(results)} result(s)")
+            debug(f"{prefix} {len(results)} result(s)")
            return results[:limit]

        except Exception as exc:
@@ -611,13 +637,13 @@ class HydrusNetwork(Store):
        Only explicit user actions (e.g. the get-file cmdlet) should open files.
        """
        
-        debug(f"[HydrusNetwork.get_file] Starting for hash: {file_hash[:12]}...")
+        debug(f"{self._log_prefix()} get_file: start hash={file_hash[:12]}...")
        
        # Build browser URL with access key
        base_url = str(self.URL).rstrip('/')
        access_key = str(self.API)
        browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
-        debug(f"[HydrusNetwork.get_file] Returning URL: {browser_url}")
+        debug(f"{self._log_prefix()} get_file: url={browser_url}")
        return browser_url

    def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
@@ -632,17 +658,28 @@ class HydrusNetwork(Store):
        try:
            client = self._client
            if not client:
-                debug("get_metadata: Hydrus client unavailable")
+                debug(f"{self._log_prefix()} get_metadata: client unavailable")
                return None
            
-            # Fetch file metadata
-            payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True)
+            # Fetch file metadata with the fields we need for CLI display.
+            payload = client.fetch_file_metadata(
+                hashes=[file_hash],
+                include_service_keys_to_tags=True,
+                include_file_url=True,
+                include_duration=True,
+                include_size=True,
+                include_mime=True,
+            )
            
            if not payload or not payload.get("metadata"):
                return None
            
            meta = payload["metadata"][0]

+            # Hydrus can return placeholder metadata rows for unknown hashes.
+            if not isinstance(meta, dict) or meta.get("file_id") is None:
+                return None
+            
            # Extract title from tags
            title = f"Hydrus_{file_hash[:12]}"
            tags_payload = meta.get("tags", {})
@@ -660,33 +697,109 @@ class HydrusNetwork(Store):
                    if title != f"Hydrus_{file_hash[:12]}":
                        break
            
-            # Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map if needed.
-            mime_type = meta.get("mime", "")
-            ext_raw = meta.get("ext")
-            ext = str(ext_raw or "").strip().lstrip(".")
-            if not ext and mime_type:
+            # Hydrus may return mime as an int enum, or sometimes a human label.
+            mime_val = meta.get("mime")
+            filetype_human = meta.get("filetype_human") or meta.get("mime_human") or meta.get("mime_string")
+
+            # Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext),
+            # then title suffix, then file path suffix.
+            ext = str(meta.get("ext") or "").strip().lstrip(".")
+            if not ext:
+                ft = str(filetype_human or "").strip().lstrip(".").lower()
+                if ft and ft != "unknown filetype" and ft.isalnum() and len(ft) <= 8:
+                    # Treat simple labels like "mp4", "m4a", "webm" as extensions.
+                    ext = ft
+            if not ext and isinstance(title, str) and "." in title:
                try:
-                    from SYS.utils_constant import mime_maps
-                    for category in mime_maps.values():
-                        for _ext_key, info in category.items():
-                            if mime_type in info.get("mimes", []):
-                                ext = str(info.get("ext", "")).strip().lstrip(".")
-                                break
-                        if ext:
-                            break
+                    ext = Path(title).suffix.lstrip(".")
                except Exception:
                    ext = ""
+            if not ext:
+                try:
+                    path_payload = client.get_file_path(file_hash)
+                    if isinstance(path_payload, dict):
+                        p = path_payload.get("path")
+                        if isinstance(p, str) and p.strip():
+                            ext = Path(p.strip()).suffix.lstrip(".")
+                except Exception:
+                    ext = ""
+
+            # If extension is still unknown, attempt a best-effort lookup from MIME.
+            def _mime_from_ext(ext_value: str) -> str:
+                ext_clean = str(ext_value or "").strip().lstrip(".").lower()
+                if not ext_clean:
+                    return ""
+                try:
+                    for category in mime_maps.values():
+                        info = category.get(ext_clean)
+                        if isinstance(info, dict):
+                            mimes = info.get("mimes")
+                            if isinstance(mimes, list) and mimes:
+                                first = mimes[0]
+                                return str(first)
+                except Exception:
+                    return ""
+                return ""
+
+            # Normalize to a MIME string for CLI output.
+            # Avoid passing through human labels like "unknown filetype".
+            mime_type = ""
+            if isinstance(mime_val, str):
+                candidate = mime_val.strip()
+                if "/" in candidate and candidate.lower() != "unknown filetype":
+                    mime_type = candidate
+            if not mime_type and isinstance(filetype_human, str):
+                candidate = filetype_human.strip()
+                if "/" in candidate and candidate.lower() != "unknown filetype":
+                    mime_type = candidate
+            if not mime_type:
+                mime_type = _mime_from_ext(ext)
+
+            # Normalize size/duration to stable scalar types.
+            size_val = meta.get("size")
+            if size_val is None:
+                size_val = meta.get("size_bytes")
+            try:
+                size_int: int | None = int(size_val) if size_val is not None else None
+            except Exception:
+                size_int = None
+
+            dur_val = meta.get("duration")
+            if dur_val is None:
+                dur_val = meta.get("duration_ms")
+            try:
+                dur_int: int | None = int(dur_val) if dur_val is not None else None
+            except Exception:
+                dur_int = None
+            
+            raw_urls = (
+                meta.get("known_urls")
+                or meta.get("urls")
+                or meta.get("url")
+                or []
+            )
+            url_list: list[str] = []
+            if isinstance(raw_urls, str):
+                s = raw_urls.strip()
+                url_list = [s] if s else []
+            elif isinstance(raw_urls, list):
+                url_list = [str(u).strip() for u in raw_urls if isinstance(u, str) and str(u).strip()]

            return {
                "hash": file_hash,
                "title": title,
                "ext": ext,
-                "size": meta.get("size"),
+                "size": size_int,
                "mime": mime_type,
+                # Keep raw fields available for troubleshooting/other callers.
+                "hydrus_mime": mime_val,
+                "filetype_human": filetype_human,
+                "duration_ms": dur_int,
+                "url": url_list,
            }
            
        except Exception as exc:
-            debug(f"Failed to get metadata from Hydrus: {exc}")
+            debug(f"{self._log_prefix()} get_metadata failed: {exc}")
            return None

    def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
@@ -705,13 +818,13 @@ class HydrusNetwork(Store):

            file_hash = str(file_identifier or "").strip().lower()
            if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
-                debug(f"get_tags: invalid file hash '{file_identifier}'")
+                debug(f"{self._log_prefix()} get_tags: invalid file hash '{file_identifier}'")
                return [], "unknown"
            
            # Get Hydrus client and service info
            client = self._client
            if not client:
-                debug("get_tags: Hydrus client unavailable")
+                debug(f"{self._log_prefix()} get_tags: client unavailable")
                return [], "unknown"
            
            # Fetch file metadata
@@ -723,12 +836,12 @@ class HydrusNetwork(Store):
            
            items = payload.get("metadata") if isinstance(payload, dict) else None
            if not isinstance(items, list) or not items:
-                debug(f"get_tags: No metadata returned for hash {file_hash}")
+                debug(f"{self._log_prefix()} get_tags: no metadata for hash {file_hash}")
                return [], "unknown"
            
            meta = items[0] if isinstance(items[0], dict) else None
            if not isinstance(meta, dict) or meta.get("file_id") is None:
-                debug(f"get_tags: Invalid metadata for hash {file_hash}")
+                debug(f"{self._log_prefix()} get_tags: invalid metadata for hash {file_hash}")
                return [], "unknown"
            
            # Extract tags using service name
@@ -741,7 +854,7 @@ class HydrusNetwork(Store):
            return tags, "hydrus"
        
        except Exception as exc:
-            debug(f"get_tags failed for Hydrus file: {exc}")
+            debug(f"{self._log_prefix()} get_tags failed: {exc}")
            return [], "unknown"

    def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
@@ -750,12 +863,12 @@ class HydrusNetwork(Store):
        try:
            client = self._client
            if client is None:
-                debug("add_tag: Hydrus client unavailable")
+                debug(f"{self._log_prefix()} add_tag: client unavailable")
                return False

            file_hash = str(file_identifier or "").strip().lower()
            if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
-                debug(f"add_tag: invalid file hash '{file_identifier}'")
+                debug(f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'")
                return False
            service_name = kwargs.get("service_name") or "my tags"
            # Ensure tags is a list
@@ -765,7 +878,7 @@ class HydrusNetwork(Store):
            client.add_tag(file_hash, tag_list, service_name)
            return True
        except Exception as exc:
-            debug(f"Hydrus add_tag failed: {exc}")
+            debug(f"{self._log_prefix()} add_tag failed: {exc}")
            return False

    def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
@@ -774,12 +887,12 @@ class HydrusNetwork(Store):
        try:
            client = self._client
            if client is None:
-                debug("delete_tag: Hydrus client unavailable")
+                debug(f"{self._log_prefix()} delete_tag: client unavailable")
                return False

            file_hash = str(file_identifier or "").strip().lower()
            if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
-                debug(f"delete_tag: invalid file hash '{file_identifier}'")
+                debug(f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'")
                return False
            service_name = kwargs.get("service_name") or "my tags"
            tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
@@ -788,7 +901,7 @@ class HydrusNetwork(Store):
            client.delete_tag(file_hash, tag_list, service_name)
            return True
        except Exception as exc:
-            debug(f"Hydrus delete_tag failed: {exc}")
+            debug(f"{self._log_prefix()} delete_tag failed: {exc}")
            return False

    def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
@@ -797,7 +910,7 @@ class HydrusNetwork(Store):
        try:
            client = self._client
            if client is None:
-                debug("get_url: Hydrus client unavailable")
+                debug(f"{self._log_prefix()} get_url: client unavailable")
                return []

            file_hash = str(file_identifier or "").strip().lower()
@@ -830,7 +943,7 @@ class HydrusNetwork(Store):
                return out
            return []
        except Exception as exc:
-            debug(f"Hydrus get_url failed: {exc}")
+            debug(f"{self._log_prefix()} get_url failed: {exc}")
            return []

    def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
@@ -839,13 +952,13 @@ class HydrusNetwork(Store):
        try:
            client = self._client
            if client is None:
-                debug("add_url: Hydrus client unavailable")
+                debug(f"{self._log_prefix()} add_url: client unavailable")
                return False
            for u in url:
                client.associate_url(file_identifier, u)
            return True
        except Exception as exc:
-            debug(f"Hydrus add_url failed: {exc}")
+            debug(f"{self._log_prefix()} add_url failed: {exc}")
            return False

    def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
@@ -854,13 +967,13 @@ class HydrusNetwork(Store):
        try:
            client = self._client
            if client is None:
-                debug("delete_url: Hydrus client unavailable")
+                debug(f"{self._log_prefix()} delete_url: client unavailable")
                return False
            for u in url:
                client.delete_url(file_identifier, u)
            return True
        except Exception as exc:
-            debug(f"Hydrus delete_url failed: {exc}")
+            debug(f"{self._log_prefix()} delete_url failed: {exc}")
            return False

    def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
@@ -868,7 +981,7 @@ class HydrusNetwork(Store):
        try:
            client = self._client
            if client is None:
-                debug("get_note: Hydrus client unavailable")
+                debug(f"{self._log_prefix()} get_note: client unavailable")
                return {}

            file_hash = str(file_identifier or "").strip().lower()
@@ -889,7 +1002,7 @@ class HydrusNetwork(Store):

            return {}
        except Exception as exc:
-            debug(f"Hydrus get_note failed: {exc}")
+            debug(f"{self._log_prefix()} get_note failed: {exc}")
            return {}

    def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool:
@@ -897,7 +1010,7 @@ class HydrusNetwork(Store):
        try:
            client = self._client
            if client is None:
-                debug("set_note: Hydrus client unavailable")
+                debug(f"{self._log_prefix()} set_note: client unavailable")
                return False

            file_hash = str(file_identifier or "").strip().lower()
@@ -912,7 +1025,7 @@ class HydrusNetwork(Store):
            client.set_notes(file_hash, {note_name: note_text})
            return True
        except Exception as exc:
-            debug(f"Hydrus set_note failed: {exc}")
+            debug(f"{self._log_prefix()} set_note failed: {exc}")
            return False

    def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
@@ -920,7 +1033,7 @@ class HydrusNetwork(Store):
        try:
            client = self._client
            if client is None:
-                debug("delete_note: Hydrus client unavailable")
+                debug(f"{self._log_prefix()} delete_note: client unavailable")
                return False

            file_hash = str(file_identifier or "").strip().lower()
@@ -934,7 +1047,7 @@ class HydrusNetwork(Store):
            client.delete_notes(file_hash, [note_name])
            return True
        except Exception as exc:
-            debug(f"Hydrus delete_note failed: {exc}")
+            debug(f"{self._log_prefix()} delete_note failed: {exc}")
            return False

    @staticmethod
@@ -6,6 +6,7 @@ import sys
 import shutil
 import tempfile
 import re
+from urllib.parse import urlsplit, parse_qs

 import models
 import pipeline as ctx
@@ -13,12 +14,20 @@ from API import HydrusNetwork as hydrus_wrapper
 from SYS.logger import log, debug
 from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
 from Store import Store
-from ._shared import (
-    Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs,
-    extract_tag_from_result, extract_title_from_result, extract_url_from_result,
-    merge_sequences, extract_relationships, extract_duration, coerce_to_pipe_object
-)
-from ._shared import collapse_namespace_tag
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+parse_cmdlet_args = sh.parse_cmdlet_args
+SharedArgs = sh.SharedArgs
+extract_tag_from_result = sh.extract_tag_from_result
+extract_title_from_result = sh.extract_title_from_result
+extract_url_from_result = sh.extract_url_from_result
+merge_sequences = sh.merge_sequences
+extract_relationships = sh.extract_relationships
+extract_duration = sh.extract_duration
+coerce_to_pipe_object = sh.coerce_to_pipe_object
+collapse_namespace_tag = sh.collapse_namespace_tag
 from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store
 from SYS.utils import sha256_file, unique_path
 from metadata import write_metadata
@@ -181,7 +190,7 @@ class Add_File(Cmdlet):
                        downloaded_path = Path(downloaded)
                        if downloaded_path.exists() and downloaded_path.is_dir():
                            log(
-                                "[add-file] OpenLibrary download produced a directory (missing img2pdf?). Cannot ingest.",
+                                "[add-file] OpenLibrary download produced a directory (PDF conversion failed). Cannot ingest.",
                                file=sys.stderr,
                            )
                            failures += 1
@@ -192,6 +201,26 @@ class Add_File(Cmdlet):
                        delete_after_item = True

                    # For non-provider URLs, or if still a URL after provider attempt, delegate to download-media.
+                    if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
+                        ("http://", "https://", "magnet:", "torrent:")
+                    ):
+                        # Hydrus file URLs are direct file downloads and may require Hydrus auth headers.
+                        # If the user provided a destination (-provider or -store), download now and continue.
+                        if (provider_name or location) and isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(("http://", "https://")):
+                            downloaded = self._try_download_hydrus_file_url(
+                                file_url=str(media_path_or_url),
+                                pipe_obj=pipe_obj,
+                                config=config,
+                            )
+                            if downloaded is not None:
+                                downloaded_path, downloaded_temp_dir = downloaded
+                                temp_dir_to_cleanup = downloaded_temp_dir
+                                media_path_or_url = str(downloaded_path)
+                                pipe_obj.path = str(downloaded_path)
+                                pipe_obj.is_temp = True
+                                delete_after_item = True
+
+                        # If it's still a URL target, fall back to the legacy delegate.
                        if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
                            ("http://", "https://", "magnet:", "torrent:")
                        ):
@@ -767,6 +796,134 @@ class Add_File(Cmdlet):
            return True
        return False

+    @staticmethod
+    def _sanitize_filename(value: str) -> str:
+        # Minimal Windows-safe filename sanitization.
+        text = str(value or "").strip()
+        if not text:
+            return "file"
+        invalid = '<>:"/\\|?*'
+        text = "".join("_" if (ch in invalid or ord(ch) < 32) else ch for ch in text)
+        text = re.sub(r"\s+", " ", text).strip(" .")
+        return text or "file"
+
+    @staticmethod
+    def _parse_hydrus_file_url(file_url: str) -> Optional[str]:
+        """Return the sha256 hash from a Hydrus /get_files/file URL, or None."""
+        try:
+            split = urlsplit(str(file_url))
+            if split.scheme.lower() not in {"http", "https"}:
+                return None
+            path_lower = (split.path or "").lower()
+            if "/get_files/file" not in path_lower:
+                return None
+            params = parse_qs(split.query or "")
+            raw = None
+            if "hash" in params and params["hash"]:
+                raw = params["hash"][0]
+            if not raw:
+                return None
+            hash_val = str(raw).strip().lower()
+            if not re.fullmatch(r"[0-9a-f]{64}", hash_val):
+                return None
+            return hash_val
+        except Exception:
+            return None
+
+    def _try_download_hydrus_file_url(
+        self,
+        *,
+        file_url: str,
+        pipe_obj: models.PipeObject,
+        config: Dict[str, Any],
+    ) -> Optional[tuple[Path, Path]]:
+        """If *file_url* is a Hydrus file URL, download it to temp and return (path, temp_dir)."""
+        file_hash = self._parse_hydrus_file_url(file_url)
+        if not file_hash:
+            return None
+
+        # Resolve Hydrus backend for auth.
+        store_name = str(getattr(pipe_obj, "store", "") or "").strip()
+        if ":" in store_name:
+            store_name = store_name.split(":", 1)[-1].strip()
+
+        backend = None
+        try:
+            store_registry = Store(config)
+            if store_name and store_registry.is_available(store_name):
+                candidate = store_registry[store_name]
+                if type(candidate).__name__.lower() == "hydrusnetwork":
+                    backend = candidate
+        except Exception:
+            backend = None
+
+        if backend is None:
+            try:
+                store_registry = Store(config)
+                target_prefix = str(file_url).split("/get_files/file", 1)[0].rstrip("/")
+                for backend_name in store_registry.list_backends():
+                    candidate = store_registry[backend_name]
+                    if type(candidate).__name__.lower() != "hydrusnetwork":
+                        continue
+                    base_url = str(getattr(candidate, "URL", "") or "").rstrip("/")
+                    if base_url and (target_prefix.lower() == base_url.lower() or target_prefix.lower().startswith(base_url.lower())):
+                        backend = candidate
+                        break
+            except Exception:
+                backend = None
+
+        if backend is None:
+            debug("[add-file] Hydrus file URL detected but no Hydrus backend matched for auth")
+            return None
+
+        api_key = str(getattr(backend, "API", "") or "").strip()
+        if not api_key:
+            debug(f"[add-file] Hydrus backend '{getattr(backend, 'NAME', '') or store_name}' missing API key")
+            return None
+
+        # Best-effort filename from title + ext.
+        ext = ""
+        try:
+            if isinstance(pipe_obj.extra, dict):
+                ext = str(pipe_obj.extra.get("ext") or "").strip().lstrip(".")
+        except Exception:
+            ext = ""
+        if not ext:
+            ext = "bin"
+
+        title_hint = str(getattr(pipe_obj, "title", "") or "").strip()
+        base_name = self._sanitize_filename(title_hint) if title_hint else f"hydrus_{file_hash[:12]}"
+
+        temp_dir = Path(tempfile.mkdtemp(prefix="medios_hydrus_"))
+        destination = unique_path(temp_dir / f"{base_name}.{ext}")
+
+        headers = {"Hydrus-Client-API-Access-Key": api_key}
+        timeout = 60.0
+        try:
+            client = getattr(backend, "_client", None)
+            timeout_val = getattr(client, "timeout", None)
+            if timeout_val is not None:
+                timeout = float(timeout_val)
+        except Exception:
+            timeout = 60.0
+
+        try:
+            log(
+                f"[add-file] Downloading Hydrus file via API ({getattr(backend, 'NAME', '') or store_name})",
+                file=sys.stderr,
+            )
+            downloaded_bytes = hydrus_wrapper.download_hydrus_file(str(file_url), headers, destination, timeout)
+            if downloaded_bytes <= 0 and not destination.exists():
+                return None
+            return destination, temp_dir
+        except Exception as exc:
+            log(f"[add-file] Hydrus download failed: {exc}", file=sys.stderr)
+            try:
+                shutil.rmtree(temp_dir, ignore_errors=True)
+            except Exception:
+                pass
+            return None
+
    def _delegate_to_download_data(
        self,
        result: Any,
@@ -883,6 +1040,61 @@ class Add_File(Cmdlet):
        except Exception:
            return None

+    @staticmethod
+    def _get_note_text(result: Any, pipe_obj: models.PipeObject, note_name: str) -> Optional[str]:
+        """Extract a named note text from a piped item.
+
+        Supports:
+        - pipe_obj.extra["notes"][note_name]
+        - result["notes"][note_name] for dict results
+        - pipe_obj.extra[note_name] / result[note_name] as fallback
+        """
+
+        def _normalize(val: Any) -> Optional[str]:
+            if val is None:
+                return None
+            if isinstance(val, bytes):
+                try:
+                    val = val.decode("utf-8", errors="ignore")
+                except Exception:
+                    val = str(val)
+            if isinstance(val, str):
+                text = val.strip()
+                return text if text else None
+            try:
+                text = str(val).strip()
+                return text if text else None
+            except Exception:
+                return None
+
+        note_key = str(note_name or "").strip()
+        if not note_key:
+            return None
+
+        # Prefer notes dict on PipeObject.extra (common for cmdlet-emitted dicts)
+        try:
+            if isinstance(pipe_obj.extra, dict):
+                notes_val = pipe_obj.extra.get("notes")
+                if isinstance(notes_val, dict) and note_key in notes_val:
+                    return _normalize(notes_val.get(note_key))
+                if note_key in pipe_obj.extra:
+                    return _normalize(pipe_obj.extra.get(note_key))
+        except Exception:
+            pass
+
+        # Fallback to raw result dict
+        if isinstance(result, dict):
+            try:
+                notes_val = result.get("notes")
+                if isinstance(notes_val, dict) and note_key in notes_val:
+                    return _normalize(notes_val.get(note_key))
+                if note_key in result:
+                    return _normalize(result.get(note_key))
+            except Exception:
+                pass
+
+        return None
+
    @staticmethod
    def _update_pipe_object_destination(
        pipe_obj: models.PipeObject,
@@ -1451,6 +1663,26 @@ class Add_File(Cmdlet):
                except Exception:
                    pass

+            # If a subtitle note was provided upstream (e.g., download-media writes notes.sub),
+            # persist it automatically like add-note would.
+            sub_note = Add_File._get_note_text(result, pipe_obj, "sub")
+            if sub_note:
+                try:
+                    setter = getattr(backend, "set_note", None)
+                    if callable(setter):
+                        setter(resolved_hash, "sub", sub_note)
+                except Exception:
+                    pass
+
+            chapters_note = Add_File._get_note_text(result, pipe_obj, "chapters")
+            if chapters_note:
+                try:
+                    setter = getattr(backend, "set_note", None)
+                    if callable(setter):
+                        setter(resolved_hash, "chapters", chapters_note)
+                except Exception:
+                    pass
+
            meta: Dict[str, Any] = {}
            try:
                meta = backend.get_metadata(resolved_hash) or {}
@@ -7,15 +7,15 @@ import sys
 from SYS.logger import log

 import pipeline as ctx
-from ._shared import (
-    Cmdlet,
-    CmdletArg,
-    SharedArgs,
-    normalize_hash,
-    parse_cmdlet_args,
-    normalize_result_input,
-    should_show_help,
-)
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+normalize_hash = sh.normalize_hash
+parse_cmdlet_args = sh.parse_cmdlet_args
+normalize_result_input = sh.normalize_result_input
+should_show_help = sh.should_show_help
 from Store import Store
 from SYS.utils import sha256_file

@@ -84,9 +84,9 @@ class Add_Note(Cmdlet):
        else:
            note_text = str(text_parts or "").strip()

-        if not note_text:
-            log("[add_note] Error: Empty note text", file=sys.stderr)
-            return 1
+        # Note text can be omitted when upstream stages provide it (e.g. download-media --write-sub
+        # attaches notes.sub). In that case we resolve per-item below.
+        user_provided_text = bool(note_text)

        results = normalize_result_input(result)
        if not results:
@@ -99,11 +99,56 @@ class Add_Note(Cmdlet):
        store_registry = Store(config)
        updated = 0

+        # Optional global fallback for note text from pipeline values.
+        # Allows patterns like: ... | add-note sub
+        pipeline_default_text = None
+        if not user_provided_text:
+            try:
+                pipeline_default_text = ctx.load_value(note_name)
+            except Exception:
+                pipeline_default_text = None
+            if isinstance(pipeline_default_text, list):
+                pipeline_default_text = " ".join([str(x) for x in pipeline_default_text]).strip()
+            elif pipeline_default_text is not None:
+                pipeline_default_text = str(pipeline_default_text).strip()
+
        for res in results:
            if not isinstance(res, dict):
                ctx.emit(res)
                continue

+            # Resolve note text for this item when not provided explicitly.
+            item_note_text = note_text
+            if not user_provided_text:
+                # Prefer item-scoped notes dict.
+                candidate = None
+                try:
+                    notes = res.get("notes")
+                    if isinstance(notes, dict):
+                        candidate = notes.get(note_name)
+                except Exception:
+                    candidate = None
+
+                # Also allow direct field fallback: res["sub"], etc.
+                if candidate is None:
+                    try:
+                        candidate = res.get(note_name)
+                    except Exception:
+                        candidate = None
+
+                if candidate is None:
+                    candidate = pipeline_default_text
+
+                if isinstance(candidate, list):
+                    item_note_text = " ".join([str(x) for x in candidate]).strip()
+                else:
+                    item_note_text = str(candidate or "").strip()
+
+            if not item_note_text:
+                log(f"[add_note] Warning: No note text found for '{note_name}'; skipping", file=sys.stderr)
+                ctx.emit(res)
+                continue
+
            store_name = str(store_override or res.get("store") or "").strip()
            raw_hash = res.get("hash")
            raw_path = res.get("path")
@@ -130,7 +175,7 @@ class Add_Note(Cmdlet):

            ok = False
            try:
-                ok = bool(backend.set_note(resolved_hash, note_name, note_text, config=config))
+                ok = bool(backend.set_note(resolved_hash, note_name, item_note_text, config=config))
            except Exception as exc:
                log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr)
                ok = False
@@ -11,7 +11,15 @@ from SYS.logger import log

 import pipeline as ctx
 from API import HydrusNetwork as hydrus_wrapper
-from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, normalize_result_input, should_show_help, get_field
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+parse_cmdlet_args = sh.parse_cmdlet_args
+normalize_result_input = sh.normalize_result_input
+should_show_help = sh.should_show_help
+get_field = sh.get_field
 from API.folder import read_sidecar, find_sidecar, API_folder_store
 from Store import Store

@@ -8,19 +8,20 @@ from SYS.logger import log

 import models
 import pipeline as ctx
-from ._shared import normalize_result_input, filter_results_by_temp
-from ._shared import (
-    Cmdlet,
-    CmdletArg,
-    SharedArgs,
-    normalize_hash,
-    parse_tag_arguments,
-    expand_tag_groups,
-    parse_cmdlet_args,
-    collapse_namespace_tag,
-    should_show_help,
-    get_field,
-)
+from . import _shared as sh
+
+normalize_result_input = sh.normalize_result_input
+filter_results_by_temp = sh.filter_results_by_temp
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+normalize_hash = sh.normalize_hash
+parse_tag_arguments = sh.parse_tag_arguments
+expand_tag_groups = sh.expand_tag_groups
+parse_cmdlet_args = sh.parse_cmdlet_args
+collapse_namespace_tag = sh.collapse_namespace_tag
+should_show_help = sh.should_show_help
+get_field = sh.get_field
 from Store import Store
 from SYS.utils import sha256_file

@@ -8,19 +8,20 @@ from SYS.logger import log

 import models
 import pipeline as ctx
-from ._shared import normalize_result_input, filter_results_by_temp
-from ._shared import (
-    Cmdlet,
-    CmdletArg,
-    SharedArgs,
-    normalize_hash,
-    parse_tag_arguments,
-    expand_tag_groups,
-    parse_cmdlet_args,
-    collapse_namespace_tags,
-    should_show_help,
-    get_field,
-)
+from . import _shared as sh
+
+normalize_result_input = sh.normalize_result_input
+filter_results_by_temp = sh.filter_results_by_temp
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+normalize_hash = sh.normalize_hash
+parse_tag_arguments = sh.parse_tag_arguments
+expand_tag_groups = sh.expand_tag_groups
+parse_cmdlet_args = sh.parse_cmdlet_args
+collapse_namespace_tags = sh.collapse_namespace_tags
+should_show_help = sh.should_show_help
+get_field = sh.get_field
 from Store import Store
 from SYS.utils import sha256_file

@@ -4,12 +4,12 @@ from typing import Any, Dict, Sequence
 import sys

 import pipeline as ctx
-from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
+from . import _shared as sh
 from SYS.logger import log
 from Store import Store


-class Add_Url(Cmdlet):
+class Add_Url(sh.Cmdlet):
    """Add URL associations to files via hash+store."""

    def __init__(self) -> None:
@@ -18,9 +18,9 @@ class Add_Url(Cmdlet):
            summary="Associate a URL with a file",
            usage="@1 | add-url <url>",
            arg=[
-                SharedArgs.HASH,
-                SharedArgs.STORE,
-                CmdletArg("url", required=True, description="URL to associate"),
+                sh.SharedArgs.HASH,
+                sh.SharedArgs.STORE,
+                sh.CmdletArg("url", required=True, description="URL to associate"),
            ],
            detail=[
                "- Associates URL with file identified by hash+store",
@@ -32,11 +32,11 @@ class Add_Url(Cmdlet):
    
    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Add URL to file via hash+store backend."""
-        parsed = parse_cmdlet_args(args, self)
+        parsed = sh.parse_cmdlet_args(args, self)
        
        # Extract hash and store from result or args
-        file_hash = parsed.get("hash") or get_field(result, "hash")
-        store_name = parsed.get("store") or get_field(result, "store")
+        file_hash = parsed.get("hash") or sh.get_field(result, "hash")
+        store_name = parsed.get("store") or sh.get_field(result, "store")
        url_arg = parsed.get("url")
        
        if not file_hash:
@@ -52,7 +52,7 @@ class Add_Url(Cmdlet):
            return 1
        
        # Normalize hash
-        file_hash = normalize_hash(file_hash)
+        file_hash = sh.normalize_hash(file_hash)
        if not file_hash:
            log("Error: Invalid hash format")
            return 1
@@ -1,190 +0,0 @@
-from __future__ import annotations
-
-from typing import Any, Dict, Sequence
-import json
-import sys
-
-from SYS.logger import log
-
-from API import HydrusNetwork as hydrus_wrapper
-from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, should_show_help
-from Store import Store
-
-
-CMDLET = Cmdlet(
-    name="check-file-status",
-    summary="Check if a file is active, deleted, or corrupted in Hydrus.",
-    usage="check-file-status [-hash <sha256>] [-store <name>]",
-    arg=[
-        SharedArgs.HASH,
-        SharedArgs.STORE,
-    ],
-    detail=[
-        "- Shows whether file is active in Hydrus or marked as deleted",
-        "- Detects corrupted data (e.g., comma-separated url)",
-        "- Displays file metadata and service locations",
-        "- Note: Hydrus keeps deleted files for recovery. Use cleanup-corrupted for full removal.",
-    ],
-)
-
-
-def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
-    # Help
-    if should_show_help(args):
-        log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
-        return 0
-
-    # Parse arguments
-    override_hash: str | None = None
-    override_store: str | None = None
-    i = 0
-    while i < len(args):
-        token = args[i]
-        low = str(token).lower()
-        if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
-            override_hash = str(args[i + 1]).strip()
-            i += 2
-            continue
-        if low in {"-store", "--store", "store"} and i + 1 < len(args):
-            override_store = str(args[i + 1]).strip()
-            i += 2
-            continue
-        i += 1
-
-    store_name: str | None = override_store
-    if not store_name:
-        if isinstance(result, dict):
-            store_name = str(result.get("store") or "").strip() or None
-        else:
-            store_name = str(getattr(result, "store", "") or "").strip() or None
-
-    if override_hash:
-        hash_hex = normalize_hash(override_hash)
-    else:
-        if isinstance(result, dict):
-            hash_hex = normalize_hash(result.get("hash") or result.get("hash_hex"))
-        else:
-            hash_hex = normalize_hash(getattr(result, "hash", None) or getattr(result, "hash_hex", None))
-    
-    if not hash_hex:
-        log("No hash provided and no result selected", file=sys.stderr)
-        return 1
-
-    try:
-        client = None
-        if store_name:
-            # Store specified: do not fall back to a global/default Hydrus client.
-            try:
-                store = Store(config)
-                backend = store[str(store_name)]
-                candidate = getattr(backend, "_client", None)
-                if candidate is not None and hasattr(candidate, "fetch_file_metadata"):
-                    client = candidate
-            except Exception:
-                client = None
-
-            if client is None:
-                log(f"Hydrus client unavailable for store '{store_name}'", file=sys.stderr)
-                return 1
-        else:
-            client = hydrus_wrapper.get_client(config)
-
-        if client is None:
-            log("Hydrus client unavailable", file=sys.stderr)
-            return 1
-    except Exception as exc:
-        log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
-        return 1
-
-    try:
-        result_data = client.fetch_file_metadata(hashes=[hash_hex])
-        if not result_data.get("metadata"):
-            log(f"File not found: {hash_hex[:16]}...", file=sys.stderr)
-            return 1
-
-        file_info = result_data["metadata"][0]
-        
-        # Status summary
-        is_deleted = file_info.get("is_deleted", False)
-        is_local = file_info.get("is_local", False)
-        is_trashed = file_info.get("is_trashed", False)
-        
-        status_str = "DELETED" if is_deleted else ("TRASHED" if is_trashed else "ACTIVE")
-        log(f"File status: {status_str}", file=sys.stderr)
-        
-        # File info
-        log(f"\n📄 File Information:", file=sys.stderr)
-        log(f"  Hash: {file_info['hash'][:16]}...", file=sys.stderr)
-        log(f"  Size: {file_info['size']:,} bytes", file=sys.stderr)
-        log(f"  MIME: {file_info['mime']}", file=sys.stderr)
-        log(f"  Dimensions: {file_info.get('width', '?')}x{file_info.get('height', '?')}", file=sys.stderr)
-        
-        # Service status
-        file_services = file_info.get("file_services", {})
-        current_services = file_services.get("current", {})
-        deleted_services = file_services.get("deleted", {})
-        
-        if current_services:
-            log(f"\n✓ In services ({len(current_services)}):", file=sys.stderr)
-            for service_key, service_info in current_services.items():
-                sname = service_info.get("name", "unknown")
-                stype = service_info.get("type_pretty", "unknown")
-                log(f"    - {sname} ({stype})", file=sys.stderr)
-        
-        if deleted_services:
-            log(f"\n✗ Deleted from services ({len(deleted_services)}):", file=sys.stderr)
-            for service_key, service_info in deleted_services.items():
-                sname = service_info.get("name", "unknown")
-                stype = service_info.get("type_pretty", "unknown")
-                time_deleted = service_info.get("time_deleted", "?")
-                log(f"    - {sname} ({stype}) - deleted at {time_deleted}", file=sys.stderr)
-        
-        # URL check
-        url = file_info.get("url", [])
-        log(f"\n🔗 url ({len(url)}):", file=sys.stderr)
-        
-        corrupted_count = 0
-        for i, url in enumerate(url, 1):
-            if "," in url:
-                corrupted_count += 1
-                log(f"    [{i}] ⚠️ CORRUPTED (comma-separated): {url[:50]}...", file=sys.stderr)
-            else:
-                log(f"    [{i}] {url[:70]}{'...' if len(url) > 70 else ''}", file=sys.stderr)
-        
-        if corrupted_count > 0:
-            log(f"\n⚠️ WARNING: Found {corrupted_count} corrupted URL(s)", file=sys.stderr)
-        
-        # Tags
-        tags_dict = file_info.get("tags", {})
-        total_tags = 0
-        for service_key, service_data in tags_dict.items():
-            service_name = service_data.get("name", "unknown")
-            display_tags = service_data.get("display_tags", {}).get("0", [])
-            total_tags += len(display_tags)
-        
-        if total_tags > 0:
-            log(f"\n🏷️ Tags ({total_tags}):", file=sys.stderr)
-            for service_key, service_data in tags_dict.items():
-                display_tags = service_data.get("display_tags", {}).get("0", [])
-                if display_tags:
-                    service_name = service_data.get("name", "unknown")
-                    log(f"    {service_name}:", file=sys.stderr)
-                    for tag in display_tags[:5]:  # Show first 5
-                        log(f"      - {tag}", file=sys.stderr)
-                    if len(display_tags) > 5:
-                        log(f"      ... and {len(display_tags) - 5} more", file=sys.stderr)
-        
-        log("\n", file=sys.stderr)
-        return 0
-        
-    except Exception as exc:
-        log(f"Error checking file status: {exc}", file=sys.stderr)
-        import traceback
-        traceback.print_exc(file=sys.stderr)
-        return 1
-
-
-# Register cmdlet (no legacy decorator)
-CMDLET.exec = _run
-CMDLET.alias = ["check-status", "file-status", "status"]
-CMDLET.register()
@@ -1,105 +0,0 @@
-"""Cleanup cmdlet for removing temporary artifacts from pipeline.
-
-This cmdlet processes result lists and removes temporary files (marked with is_temp=True),
-then emits the remaining non-temporary results for further pipeline stages.
-"""
-
-from __future__ import annotations
-
-from typing import Any, Dict, Sequence
-from pathlib import Path
-import sys
-import json
-
-from SYS.logger import log
-
-from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp, should_show_help
-import models
-import pipeline as pipeline_context
-
-def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
-    """Remove temporary files from pipeline results.
-    
-    Accepts:
-    - Single result object with is_temp field
-    - List of result objects to clean up
-    
-    Process:
-    - Filters results by is_temp=True
-    - Deletes those files from disk
-    - Emits only non-temporary results
-    
-    Typical pipeline usage:
-    download-data url | screen-shot | add-tag -store local "tag" --all | cleanup
-    """
-    
-    # Help
-    if should_show_help(args):
-        log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
-        return 0
-    
-    # Normalize input to list
-    results = normalize_result_input(result)
-    
-    if not results:
-        log("[cleanup] No results to process", file=sys.stderr)
-        return 1
-    
-    # Separate temporary and permanent results
-    temp_results = pipeline_context.filter_results_by_temp(results, include_temp=True)
-    perm_results = pipeline_context.filter_results_by_temp(results, include_temp=False)
-    
-    # Delete temporary files
-    deleted_count = 0
-    for temp_result in temp_results:
-        try:
-            file_path = get_pipe_object_path(temp_result)
-            
-            if file_path:
-                path_obj = Path(file_path)
-                if path_obj.exists():
-                    # Delete the file
-                    path_obj.unlink()
-                    log(f"[cleanup] Deleted temporary file: {path_obj.name}", file=sys.stderr)
-                    deleted_count += 1
-                    
-                    # Clean up any associated sidecar files
-                    for ext in ['.tag', '.metadata']:
-                        sidecar = path_obj.parent / (path_obj.name + ext)
-                        if sidecar.exists():
-                            try:
-                                sidecar.unlink()
-                                log(f"[cleanup] Deleted sidecar: {sidecar.name}", file=sys.stderr)
-                            except Exception as e:
-                                log(f"[cleanup] Warning: Could not delete sidecar {sidecar.name}: {e}", file=sys.stderr)
-                else:
-                    log(f"[cleanup] File does not exist: {file_path}", file=sys.stderr)
-        except Exception as e:
-            log(f"[cleanup] Error deleting file: {e}", file=sys.stderr)
-    
-    # Log summary
-    log(f"[cleanup] Deleted {deleted_count} temporary file(s), emitting {len(perm_results)} permanent result(s)", file=sys.stderr)
-    
-    # Emit permanent results for downstream processing
-    for perm_result in perm_results:
-        pipeline_context.emit(perm_result)
-    
-    return 0
-
-
-CMDLET = Cmdlet(
-    name="cleanup",
-    summary="Remove temporary artifacts from pipeline (marked with is_temp=True).",
-    usage="cleanup",
-    arg=[],
-    detail=[
-        "- Accepts pipeline results that may contain temporary files (screenshots, intermediate artifacts)",
-        "- Deletes files marked with is_temp=True from disk",
-        "- Also cleans up associated sidecar files (.tag, .metadata)",
-        "- Emits only non-temporary results for further processing",
-        "- Typical usage at end of pipeline: ... | add-tag -store local \"tag\" --all | cleanup",
-        "- Exit code 0 if cleanup successful, 1 if no results to process",
-    ],
-    exec=_run,
-).register()
-
@@ -8,12 +8,12 @@ from pathlib import Path
 from SYS.logger import debug, log
 from Store.Folder import Folder
 from Store import Store
-from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash, get_field, should_show_help
+from . import _shared as sh
 from API import HydrusNetwork as hydrus_wrapper
 import pipeline as ctx


-class Delete_File(Cmdlet):
+class Delete_File(sh.Cmdlet):
    """Class-based delete-file cmdlet with self-registration."""

    def __init__(self) -> None:
@@ -23,10 +23,10 @@ class Delete_File(Cmdlet):
            usage="delete-file [-hash <sha256>] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
            alias=["del-file"],
            arg=[
-                CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
-                CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
-                CmdletArg("lib-root", description="Path to local library root for database cleanup."),
-                CmdletArg("reason", description="Optional reason for deletion (free text)."),
+                sh.CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
+                sh.CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
+                sh.CmdletArg("lib-root", description="Path to local library root for database cleanup."),
+                sh.CmdletArg("reason", description="Optional reason for deletion (free text)."),
            ],
            detail=[
                "Default removes both the local file and Hydrus file.",
@@ -45,24 +45,28 @@ class Delete_File(Cmdlet):
        if isinstance(item, dict):
            hash_hex_raw = item.get("hash_hex") or item.get("hash")
            target = item.get("target") or item.get("file_path") or item.get("path")
+            title_val = item.get("title") or item.get("name")
        else:
-            hash_hex_raw = get_field(item, "hash_hex") or get_field(item, "hash")
-            target = get_field(item, "target") or get_field(item, "file_path") or get_field(item, "path")
+            hash_hex_raw = sh.get_field(item, "hash_hex") or sh.get_field(item, "hash")
+            target = sh.get_field(item, "target") or sh.get_field(item, "file_path") or sh.get_field(item, "path")
+            title_val = sh.get_field(item, "title") or sh.get_field(item, "name")
        
        store = None
        if isinstance(item, dict):
            store = item.get("store")
        else:
-            store = get_field(item, "store")
+            store = sh.get_field(item, "store")

        store_lower = str(store).lower() if store else ""
        is_hydrus_store = bool(store_lower) and ("hydrus" in store_lower or store_lower in {"home", "work"})
+        store_label = str(store) if store else "default"
+        hydrus_prefix = f"[hydrusnetwork:{store_label}]"
        
        # For Hydrus files, the target IS the hash
        if is_hydrus_store and not hash_hex_raw:
            hash_hex_raw = target

-        hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_hex_raw)
+        hash_hex = sh.normalize_hash(override_hash) if override_hash else sh.normalize_hash(hash_hex_raw)

        local_deleted = False
        local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
@@ -156,19 +160,28 @@ class Delete_File(Cmdlet):
            try:
                client._post("/add_files/delete_files", data=payload)  # type: ignore[attr-defined]
                hydrus_deleted = True
-                preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
-                debug(f"Deleted from Hydrus: {preview}…", file=sys.stderr)
+                title_str = str(title_val).strip() if title_val else ""
+                if title_str:
+                    debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
+                else:
+                    debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
            except Exception:
                # If it's not in Hydrus (e.g. 404 or similar), that's fine
                if not local_deleted:
                    return False

        if hydrus_deleted and hash_hex:
-            preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
+            title_str = str(title_val).strip() if title_val else ""
            if reason:
-                ctx.emit(f"Deleted {preview} (reason: {reason}).")
+                if title_str:
+                    ctx.emit(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex} (reason: {reason}).")
                else:
-                ctx.emit(f"Deleted {preview}.")
+                    ctx.emit(f"{hydrus_prefix} Deleted hash:{hash_hex} (reason: {reason}).")
+            else:
+                if title_str:
+                    ctx.emit(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}.")
+                else:
+                    ctx.emit(f"{hydrus_prefix} Deleted hash:{hash_hex}.")

        if hydrus_deleted or local_deleted:
            return True
@@ -178,7 +191,7 @@ class Delete_File(Cmdlet):

    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Execute delete-file command."""
-        if should_show_help(args):
+        if sh.should_show_help(args):
            log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
            return 0

@@ -7,16 +7,16 @@ import sys
 from SYS.logger import log

 import pipeline as ctx
-from ._shared import (
-    Cmdlet,
-    CmdletArg,
-    SharedArgs,
-    normalize_hash,
-    parse_cmdlet_args,
-    normalize_result_input,
-    get_field,
-    should_show_help,
-)
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+normalize_hash = sh.normalize_hash
+parse_cmdlet_args = sh.parse_cmdlet_args
+normalize_result_input = sh.normalize_result_input
+get_field = sh.get_field
+should_show_help = sh.should_show_help
 from Store import Store
 from SYS.utils import sha256_file

@@ -10,7 +10,16 @@ import sys
 from SYS.logger import log

 import pipeline as ctx
-from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, normalize_hash, normalize_result_input, get_field, should_show_help
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+parse_cmdlet_args = sh.parse_cmdlet_args
+normalize_hash = sh.normalize_hash
+normalize_result_input = sh.normalize_result_input
+get_field = sh.get_field
+should_show_help = sh.should_show_help
 from API.folder import API_folder_store
 from Store import Store
 from config import get_local_storage_path
@@ -7,7 +7,15 @@ import sys

 import models
 import pipeline as ctx
-from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, should_show_help, get_field
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+normalize_hash = sh.normalize_hash
+parse_tag_arguments = sh.parse_tag_arguments
+should_show_help = sh.should_show_help
+get_field = sh.get_field
 from SYS.logger import debug, log
 from Store import Store

@@ -4,7 +4,16 @@ from typing import Any, Dict, Sequence
 import sys

 import pipeline as ctx
-from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
+from . import _shared as sh
+
+Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash = (
+    sh.Cmdlet,
+    sh.CmdletArg,
+    sh.SharedArgs,
+    sh.parse_cmdlet_args,
+    sh.get_field,
+    sh.normalize_hash,
+)
 from SYS.logger import log
 from Store import Store

@@ -17,15 +17,15 @@ from SYS.download import DownloadError, _download_direct_file
 from SYS.logger import log, debug
 import pipeline as pipeline_context

-from ._shared import (
-    Cmdlet,
-    CmdletArg,
-    SharedArgs,
-    parse_cmdlet_args,
-    register_url_with_local_library,
-    coerce_to_pipe_object,
-    get_field,
-)
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+parse_cmdlet_args = sh.parse_cmdlet_args
+register_url_with_local_library = sh.register_url_with_local_library
+coerce_to_pipe_object = sh.coerce_to_pipe_object
+get_field = sh.get_field


 class Download_File(Cmdlet):
@@ -251,6 +251,13 @@ class Download_File(Cmdlet):

                    # Fallback: if we have a direct HTTP URL, download it directly
                    if downloaded_path is None and isinstance(target, str) and target.startswith("http"):
+                        # Guard: provider landing pages (e.g. LibGen ads.php) are HTML, not files.
+                        # Never download these as "files".
+                        if str(table or "").lower() == "libgen":
+                            low = target.lower()
+                            if ("/ads.php" in low) or ("/file.php" in low) or ("/index.php" in low):
+                                log("[download-file] Refusing to download LibGen landing page (expected provider to resolve file link)", file=sys.stderr)
+                                continue
                        debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
                        result_obj = _download_direct_file(target, final_output_dir, quiet=quiet_mode)
                        file_path = None
@@ -38,7 +38,18 @@ from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLog
 import pipeline as pipeline_context
 from result_table import ResultTable

-from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, parse_cmdlet_args, register_url_with_local_library, coerce_to_pipe_object
+from tool.ytdlp import YtDlpTool
+
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+create_pipe_object_result = sh.create_pipe_object_result
+parse_cmdlet_args = sh.parse_cmdlet_args
+register_url_with_local_library = sh.register_url_with_local_library
+coerce_to_pipe_object = sh.coerce_to_pipe_object
+get_field = sh.get_field


 # Minimal inlined helpers from helper/download.py (is_url_supported_by_ytdlp, list_formats)
@@ -62,6 +73,136 @@ _EXTRACTOR_CACHE: List[Any] | None = None
 _YTDLP_PROGRESS_BAR = ProgressBar()


+_SUBTITLE_EXTS = (".vtt", ".srt", ".ass", ".ssa", ".lrc")
+
+
+def _format_chapters_note(info: Dict[str, Any]) -> Optional[str]:
+    """Format yt-dlp chapter metadata into a stable, note-friendly text.
+
+    Output is one chapter per line, e.g.:
+      00:00 Intro
+      01:23-02:10 Topic name
+    """
+    try:
+        chapters = info.get("chapters")
+    except Exception:
+        chapters = None
+
+    if not isinstance(chapters, list) or not chapters:
+        return None
+
+    rows: List[tuple[int, Optional[int], str]] = []
+    max_t = 0
+    for ch in chapters:
+        if not isinstance(ch, dict):
+            continue
+        start_raw = ch.get("start_time")
+        end_raw = ch.get("end_time")
+        title_raw = ch.get("title") or ch.get("name") or ch.get("chapter")
+
+        try:
+            start_s = int(float(start_raw))
+        except Exception:
+            continue
+
+        end_s: Optional[int] = None
+        try:
+            if end_raw is not None:
+                end_s = int(float(end_raw))
+        except Exception:
+            end_s = None
+
+        title = str(title_raw).strip() if title_raw is not None else ""
+        rows.append((start_s, end_s, title))
+        try:
+            max_t = max(max_t, start_s, end_s or 0)
+        except Exception:
+            max_t = max(max_t, start_s)
+
+    if not rows:
+        return None
+
+    force_hours = bool(max_t >= 3600)
+
+    def _tc(seconds: int) -> str:
+        total = max(0, int(seconds))
+        minutes, secs = divmod(total, 60)
+        hours, minutes = divmod(minutes, 60)
+        if force_hours:
+            return f"{hours:02d}:{minutes:02d}:{secs:02d}"
+        return f"{minutes:02d}:{secs:02d}"
+
+    lines: List[str] = []
+    for start_s, end_s, title in sorted(rows, key=lambda r: (r[0], r[1] if r[1] is not None else 10**9, r[2])):
+        if end_s is not None and end_s > start_s:
+            prefix = f"{_tc(start_s)}-{_tc(end_s)}"
+        else:
+            prefix = _tc(start_s)
+        line = f"{prefix} {title}".strip()
+        if line:
+            lines.append(line)
+
+    text = "\n".join(lines).strip()
+    return text or None
+
+
+def _best_subtitle_sidecar(media_path: Path) -> Optional[Path]:
+    """Find the most likely subtitle sidecar file for a downloaded media file."""
+    try:
+        base_dir = media_path.parent
+        stem = media_path.stem
+        if not stem:
+            return None
+
+        candidates: List[Path] = []
+        for p in base_dir.glob(stem + ".*"):
+            try:
+                if not p.is_file():
+                    continue
+            except Exception:
+                continue
+            if p.suffix.lower() in _SUBTITLE_EXTS:
+                candidates.append(p)
+
+        if not candidates:
+            return None
+
+        def _rank(path: Path) -> tuple[int, int, float, str]:
+            name = path.name.lower()
+            lang_rank = 0 if ".en." in name or name.endswith(".en" + path.suffix.lower()) else 1
+            ext = path.suffix.lower()
+            ext_rank_map = {".vtt": 0, ".srt": 1, ".ass": 2, ".ssa": 3, ".lrc": 4}
+            ext_rank = ext_rank_map.get(ext, 9)
+            try:
+                mtime = float(path.stat().st_mtime)
+            except Exception:
+                mtime = 0.0
+            return (lang_rank, ext_rank, -mtime, name)
+
+        candidates.sort(key=_rank)
+        return candidates[0]
+    except Exception:
+        return None
+
+
+def _read_text_file(path: Path, *, max_bytes: int = 1_500_000) -> Optional[str]:
+    try:
+        data = path.read_bytes()
+    except Exception:
+        return None
+    if not data:
+        return None
+    if len(data) > max_bytes:
+        data = data[:max_bytes]
+    try:
+        return data.decode("utf-8", errors="replace")
+    except Exception:
+        try:
+            return data.decode(errors="replace")
+        except Exception:
+            return None
+
+
 def _ensure_yt_dlp_ready() -> None:
    if yt_dlp is not None:
        return
@@ -100,16 +241,26 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
            ydl_opts["noplaylist"] = True
        if playlist_items:
            ydl_opts["playlist_items"] = playlist_items
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:  # type: ignore[arg-type]
+
        debug(f"Fetching format list for: {url}")
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:  # type: ignore[arg-type]
            info = ydl.extract_info(url, download=False)
-            formats = info.get("formats", [])
-            if not formats:
+
+        if not isinstance(info, dict):
            log("No formats available", file=sys.stderr)
            return None
-            result_formats = []
+
+        formats = info.get("formats") or []
+        if not isinstance(formats, list) or not formats:
+            log("No formats available", file=sys.stderr)
+            return None
+
+        result_formats: List[Dict[str, Any]] = []
        for fmt in formats:
-                result_formats.append({
+            if not isinstance(fmt, dict):
+                continue
+            result_formats.append(
+                {
                    "format_id": fmt.get("format_id", ""),
                    "format": fmt.get("format", ""),
                    "ext": fmt.get("ext", ""),
@@ -122,9 +273,11 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
                    "filesize": fmt.get("filesize"),
                    "abr": fmt.get("abr"),
                    "tbr": fmt.get("tbr"),
-                })
+                }
+            )
+
        debug(f"Found {len(result_formats)} available formats")
-            return result_formats
+        return result_formats or None
    except Exception as e:
        log(f"✗ Error fetching formats: {e}", file=sys.stderr)
        return None
@@ -215,6 +368,31 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
        cmd = ["yt-dlp"]
        if ytdl_options.get("format"):
            cmd.extend(["-f", ytdl_options["format"]])
+        if ytdl_options.get("merge_output_format"):
+            cmd.extend(["--merge-output-format", str(ytdl_options["merge_output_format"])])
+
+        # For CLI downloads, infer chapter/metadata embedding from either legacy flags
+        # or explicit FFmpegMetadata postprocessor entries.
+        postprocessors = ytdl_options.get("postprocessors")
+        want_add_metadata = bool(ytdl_options.get("addmetadata"))
+        want_embed_chapters = bool(ytdl_options.get("embedchapters"))
+        if isinstance(postprocessors, list):
+            for pp in postprocessors:
+                if not isinstance(pp, dict):
+                    continue
+                if str(pp.get("key") or "") == "FFmpegMetadata":
+                    want_add_metadata = True
+                    if bool(pp.get("add_chapters", True)):
+                        want_embed_chapters = True
+
+        if want_add_metadata:
+            cmd.append("--add-metadata")
+        if want_embed_chapters:
+            cmd.append("--embed-chapters")
+        if ytdl_options.get("writesubtitles"):
+            cmd.append("--write-sub")
+            cmd.append("--write-auto-sub")
+            cmd.extend(["--sub-format", "vtt"])
        if ytdl_options.get("force_keyframes_at_cuts"):
            cmd.extend(["--force-keyframes-at-cuts"]) if ytdl_options.get("force_keyframes_at_cuts") else None
        cmd.extend(["-o", section_outtmpl])
@@ -258,11 +436,6 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:

    if opts.cookies_path and opts.cookies_path.is_file():
        base_options["cookiefile"] = str(opts.cookies_path)
-    else:
-        from hydrus_health_check import get_cookies_file_path  # local import
-        global_cookies = get_cookies_file_path()
-        if global_cookies:
-            base_options["cookiefile"] = global_cookies

    if opts.no_playlist:
        base_options["noplaylist"] = True
@@ -274,6 +447,37 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
        base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
        base_options["format_sort"] = ["res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"]

+    # Optional yt-dlp features
+    if getattr(opts, "embed_chapters", False):
+        # Prefer explicit FFmpegMetadata PP so chapter embedding runs even when
+        # we already specified other postprocessors (e.g. FFmpegExtractAudio).
+        pps = base_options.get("postprocessors")
+        if not isinstance(pps, list):
+            pps = []
+        already_has_metadata = any(
+            isinstance(pp, dict) and str(pp.get("key") or "") == "FFmpegMetadata" for pp in pps
+        )
+        if not already_has_metadata:
+            pps.append(
+                {
+                    "key": "FFmpegMetadata",
+                    "add_metadata": True,
+                    "add_chapters": True,
+                    "add_infojson": "if_exists",
+                }
+            )
+        base_options["postprocessors"] = pps
+
+        # Chapter embedding is most reliable in mkv/mp4 containers.
+        # When merging separate video+audio streams, prefer mkv so mpv sees chapters.
+        if opts.mode != "audio":
+            base_options.setdefault("merge_output_format", "mkv")
+
+    if getattr(opts, "write_sub", False):
+        base_options["writesubtitles"] = True
+        base_options["writeautomaticsub"] = True
+        base_options["subtitlesformat"] = "vtt"
+
    if opts.clip_sections:
        sections: List[str] = []

@@ -410,13 +614,27 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
            response = session.get(libgen_url, timeout=10, allow_redirects=True)
            final_url = response.url
            try:
-                from bs4 import BeautifulSoup
-                soup = BeautifulSoup(response.content, 'html.parser')
-                for link in soup.find_all('a'):
-                    href = link.get('href')
-                    if href and 'get.php' in href:
-                        return urljoin(libgen_url, href)
+                try:
+                    from lxml import html as lxml_html
                except ImportError:
+                    lxml_html = None
+
+                if lxml_html is not None:
+                    doc = lxml_html.fromstring(response.content)
+                    for a in doc.xpath("//a[@href]"):
+                        href = str(a.get("href") or "").strip()
+                        if href and "get.php" in href.lower():
+                            return urljoin(final_url, href)
+                else:
+                    for m in re.finditer(
+                        r"href=[\"\']([^\"\']+)[\"\']",
+                        response.text or "",
+                        flags=re.IGNORECASE,
+                    ):
+                        href = str(m.group(1) or "").strip()
+                        if href and "get.php" in href.lower():
+                            return urljoin(final_url, href)
+            except Exception:
                pass
            if final_url != libgen_url:
                debug(f"LibGen resolved to mirror: {final_url}")
@@ -648,7 +866,7 @@ def _download_direct_file(
        raise DownloadError(f"Error downloading file: {exc}") from exc


-def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]:
+def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15, *, cookiefile: Optional[str] = None) -> Optional[Dict[str, Any]]:
    """Probe URL to extract metadata WITHOUT downloading.
    
    Args:
@@ -686,12 +904,8 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
                "noprogress": True,  # No progress bars
            }
            
-            # Add cookies if available (lazy import to avoid circular dependency)
-            from hydrus_health_check import get_cookies_file_path  # local import
-
-            global_cookies = get_cookies_file_path()
-            if global_cookies:
-                ydl_opts["cookiefile"] = global_cookies
+            if cookiefile:
+                ydl_opts["cookiefile"] = str(cookiefile)
            
            # Add no_playlist option if specified
            if no_playlist:
@@ -807,7 +1021,14 @@ def download_media(
            debug(f"Skipping probe for playlist (item selection: {opts.playlist_items}), proceeding with download")
            probe_result = {"url": opts.url}  # Minimal probe result
        else:
-            probe_result = probe_url(opts.url, no_playlist=opts.no_playlist, timeout_seconds=15)
+            probe_cookiefile = None
+            try:
+                if opts.cookies_path and opts.cookies_path.is_file():
+                    probe_cookiefile = str(opts.cookies_path)
+            except Exception:
+                probe_cookiefile = None
+
+            probe_result = probe_url(opts.url, no_playlist=opts.no_playlist, timeout_seconds=15, cookiefile=probe_cookiefile)
        
        if probe_result is None:
            if not opts.quiet:
@@ -1182,6 +1403,8 @@ class Download_Media(Cmdlet):
        try:
            debug("Starting download-media")

+            ytdlp_tool = YtDlpTool(config)
+
            # Parse arguments
            parsed = parse_cmdlet_args(args, self)

@@ -1192,7 +1415,6 @@ class Download_Media(Cmdlet):
            
            # If no url provided via args, try to extract from piped result
            if not raw_url and result:
-                from ._shared import get_field
                # Handle single result or list of results
                results_to_check = result if isinstance(result, list) else [result]
                for item in results_to_check:
@@ -1226,6 +1448,10 @@ class Download_Media(Cmdlet):
            # Get other options
            clip_spec = parsed.get("clip")

+            # Always enable chapters + subtitles so downstream pipes (e.g. mpv) can consume them.
+            embed_chapters = True
+            write_sub = True
+
            mode = "audio" if parsed.get("audio") else "video"

            # Parse clip range(s) if specified
@@ -1379,7 +1605,14 @@ class Download_Media(Cmdlet):
                if playlist_items:
                    return str(requested_url)
                try:
-                    pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15)
+                    cf = None
+                    try:
+                        cookie_path = ytdlp_tool.resolve_cookiefile()
+                        if cookie_path is not None and cookie_path.is_file():
+                            cf = str(cookie_path)
+                    except Exception:
+                        cf = None
+                    pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
                    if isinstance(pr, dict):
                        for key in ("webpage_url", "original_url", "url", "requested_url"):
                            value = pr.get(key)
@@ -1458,7 +1691,14 @@ class Download_Media(Cmdlet):
                        - selected_urls: Optional[List[str]] (expanded per-entry urls when available)
                """
                try:
-                    pr = probe_url(url, no_playlist=False, timeout_seconds=15)
+                    cf = None
+                    try:
+                        cookie_path = ytdlp_tool.resolve_cookiefile()
+                        if cookie_path is not None and cookie_path.is_file():
+                            cf = str(cookie_path)
+                    except Exception:
+                        cf = None
+                    pr = probe_url(url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
                except Exception:
                    pr = None
                if not isinstance(pr, dict):
@@ -1686,6 +1926,15 @@ class Download_Media(Cmdlet):
                        filesize = fmt.get("filesize")
                        format_id = fmt.get("format_id", "")

+                        # If the chosen format is video-only (no audio stream), automatically
+                        # request best audio too so the resulting file has sound.
+                        selection_format_id = format_id
+                        try:
+                            if vcodec != "none" and acodec == "none" and format_id:
+                                selection_format_id = f"{format_id}+ba"
+                        except Exception:
+                            selection_format_id = format_id
+                        
                        # Format size
                        size_str = ""
                        if filesize:
@@ -1729,9 +1978,9 @@ class Download_Media(Cmdlet):
                            "full_metadata": {
                                "format_id": format_id,
                                "url": url,
-                                "item_selector": format_id,
+                                "item_selector": selection_format_id,
                            },
-                            "_selection_args": ["-format", format_id]
+                            "_selection_args": ["-format", selection_format_id]
                        }
                        
                        # Add to results list and table (don't emit - formats should wait for @N selection)
@@ -1778,23 +2027,57 @@ class Download_Media(Cmdlet):
                            actual_format = playlist_items
                            actual_playlist_items = None

-                    # Auto-pick best audio format when -audio is used and no explicit format is given.
+                    # For -audio, default to yt-dlp's built-in bestaudio selector.
+                    # This should *not* require interactive format picking.
                    if mode == "audio" and not actual_format:
-                        chosen = None
+                        actual_format = "bestaudio"
+
+                    # If no explicit format is provided for video mode, allow a config override.
+                    if mode == "video" and not actual_format:
+                        configured = (ytdlp_tool.default_format("video") or "").strip()
+                        if configured and configured != "bestvideo+bestaudio/best":
+                            actual_format = configured
+
+                    # If a single format id was chosen and it is video-only, auto-merge best audio.
+                    if (
+                        actual_format
+                        and isinstance(actual_format, str)
+                        and mode != "audio"
+                        and "+" not in actual_format
+                        and "/" not in actual_format
+                        and "[" not in actual_format
+                        and actual_format not in {"best", "bv", "ba", "b"}
+                    ):
+                        try:
                            formats = list_formats(url, no_playlist=False, playlist_items=actual_playlist_items)
                            if formats:
-                            chosen = _pick_best_audio_format_id(formats)
-                        actual_format = chosen or "bestaudio/best"
+                                fmt_match = next(
+                                    (f for f in formats if str(f.get("format_id", "")) == actual_format),
+                                    None,
+                                )
+                                if fmt_match:
+                                    vcodec = str(fmt_match.get("vcodec", "none"))
+                                    acodec = str(fmt_match.get("acodec", "none"))
+                                    if vcodec != "none" and acodec == "none":
+                                        debug(
+                                            f"Selected video-only format {actual_format}; using {actual_format}+ba for audio"
+                                        )
+                                        actual_format = f"{actual_format}+ba"
+                        except Exception:
+                            pass

                    opts = DownloadOptions(
                        url=url,
                        mode=mode,
                        output_dir=final_output_dir,
                        ytdl_format=actual_format,
+                        cookies_path=ytdlp_tool.resolve_cookiefile(),
                        clip_sections=clip_sections_spec,
                        playlist_items=actual_playlist_items,
                        quiet=quiet_mode,
                        no_playlist=False,
+                        embed_chapters=embed_chapters,
+                        write_sub=write_sub,
                    )

                    # Use timeout wrapper to prevent hanging
@@ -1838,7 +2121,40 @@ class Download_Media(Cmdlet):
                    # Build PipeObjects first so we can attach cross-clip relationships.
                    pipe_objects: List[Dict[str, Any]] = []
                    for downloaded in results_to_emit:
-                        pipe_objects.append(self._build_pipe_object(downloaded, url, opts))
+                        po = self._build_pipe_object(downloaded, url, opts)
+
+                        # Attach chapter timestamps for downstream consumers (e.g., mpv scripts)
+                        # even if container embedding fails.
+                        try:
+                            info = downloaded.info if isinstance(getattr(downloaded, "info", None), dict) else {}
+                        except Exception:
+                            info = {}
+                        chapters_text = _format_chapters_note(info) if embed_chapters else None
+                        if chapters_text:
+                            notes = po.get("notes")
+                            if not isinstance(notes, dict):
+                                notes = {}
+                            notes.setdefault("chapters", chapters_text)
+                            po["notes"] = notes
+
+                        if write_sub:
+                            try:
+                                media_path = Path(str(po.get("path") or ""))
+                            except Exception:
+                                media_path = None
+
+                            if media_path is not None and media_path.exists() and media_path.is_file():
+                                sub_path = _best_subtitle_sidecar(media_path)
+                                if sub_path is not None:
+                                    sub_text = _read_text_file(sub_path)
+                                    if sub_text:
+                                        notes = po.get("notes")
+                                        if not isinstance(notes, dict):
+                                            notes = {}
+                                        notes["sub"] = sub_text
+                                        po["notes"] = notes
+
+                        pipe_objects.append(po)

                    # If this is a clip download, decorate titles/tags so the title: tag is clip-based.
                    # Relationship tags are only added when multiple clips exist.
@@ -1868,6 +2184,95 @@ class Download_Media(Cmdlet):
                    debug("✓ Downloaded and emitted")

                except DownloadError as e:
+                    # Special-case yt-dlp format errors: show a selectable format list table so
+                    # the user can pick a working format_id and continue the pipeline via @N.
+                    cause = getattr(e, "__cause__", None)
+                    detail = ""
+                    try:
+                        detail = str(cause or "")
+                    except Exception:
+                        detail = ""
+
+                    if "requested format is not available" in (detail or "").lower() and mode != "audio":
+                        formats = list_formats(url, no_playlist=False, playlist_items=actual_playlist_items)
+                        if formats:
+                            formats_to_show = formats
+
+                            table = ResultTable()
+                            table.title = f"Available formats for {url}"
+                            table.set_source_command("download-media", [str(a) for a in (args or [])])
+
+                            results_list: List[Dict[str, Any]] = []
+                            for idx, fmt in enumerate(formats_to_show, 1):
+                                resolution = fmt.get("resolution", "")
+                                ext = fmt.get("ext", "")
+                                vcodec = fmt.get("vcodec", "none")
+                                acodec = fmt.get("acodec", "none")
+                                filesize = fmt.get("filesize")
+                                format_id = fmt.get("format_id", "")
+
+                                selection_format_id = format_id
+                                try:
+                                    if vcodec != "none" and acodec == "none" and format_id:
+                                        selection_format_id = f"{format_id}+ba"
+                                except Exception:
+                                    selection_format_id = format_id
+
+                                size_str = ""
+                                if filesize:
+                                    try:
+                                        size_mb = float(filesize) / (1024 * 1024)
+                                        size_str = f"{size_mb:.1f}MB"
+                                    except Exception:
+                                        size_str = ""
+
+                                desc_parts: List[str] = []
+                                if resolution and resolution != "audio only":
+                                    desc_parts.append(str(resolution))
+                                if ext:
+                                    desc_parts.append(str(ext).upper())
+                                if vcodec != "none":
+                                    desc_parts.append(f"v:{vcodec}")
+                                if acodec != "none":
+                                    desc_parts.append(f"a:{acodec}")
+                                if size_str:
+                                    desc_parts.append(size_str)
+                                format_desc = " | ".join(desc_parts)
+
+                                format_dict: Dict[str, Any] = {
+                                    "table": "download-media",
+                                    "title": f"Format {format_id}",
+                                    "url": url,
+                                    "target": url,
+                                    "detail": format_desc,
+                                    "media_kind": "format",
+                                    "columns": [
+                                        ("#", str(idx)),
+                                        ("ID", format_id),
+                                        ("Resolution", resolution or "N/A"),
+                                        ("Ext", ext),
+                                        ("Video", vcodec),
+                                        ("Audio", acodec),
+                                        ("Size", size_str or "N/A"),
+                                    ],
+                                    "full_metadata": {
+                                        "format_id": format_id,
+                                        "url": url,
+                                        "item_selector": selection_format_id,
+                                    },
+                                    "_selection_args": ["-format", selection_format_id],
+                                }
+
+                                results_list.append(format_dict)
+                                table.add_result(format_dict)
+
+                            pipeline_context.set_current_stage_table(table)
+                            pipeline_context.set_last_result_table(table, results_list)
+
+                            # Returning 0 with no emits lets the CLI pause the pipeline for @N selection.
+                            log("Requested format is not available; select a working format with @N", file=sys.stderr)
+                            return 0
+
                    log(f"Download failed for {url}: {e}", file=sys.stderr)
                except Exception as e:
                    log(f"Error processing {url}: {e}", file=sys.stderr)
@@ -15,9 +15,9 @@ from pathlib import Path
 from typing import Any, Dict, Optional, Sequence

 from SYS.logger import log
-from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
+from . import _shared as sh

-class Download_Torrent(Cmdlet):
+class Download_Torrent(sh.Cmdlet):
    """Class-based download-torrent cmdlet with self-registration."""

    def __init__(self) -> None:
@@ -27,10 +27,10 @@ class Download_Torrent(Cmdlet):
            usage="download-torrent <magnet|.torrent> [options]",
            alias=["torrent", "magnet"],
            arg=[
-                CmdletArg(name="magnet", type="string", required=False, description="Magnet link or .torrent file/URL", variadic=True),
-                CmdletArg(name="output", type="string", description="Output directory for downloaded files"),
-                CmdletArg(name="wait", type="float", description="Wait time (seconds) for magnet processing timeout"),
-                CmdletArg(name="background", type="flag", alias="bg", description="Start download in background"),
+                sh.CmdletArg(name="magnet", type="string", required=False, description="Magnet link or .torrent file/URL", variadic=True),
+                sh.CmdletArg(name="output", type="string", description="Output directory for downloaded files"),
+                sh.CmdletArg(name="wait", type="float", description="Wait time (seconds) for magnet processing timeout"),
+                sh.CmdletArg(name="background", type="flag", alias="bg", description="Start download in background"),
            ],
            detail=["Download torrents/magnets via AllDebrid API."],
            exec=self.run,
@@ -38,7 +38,7 @@ class Download_Torrent(Cmdlet):
        self.register()

    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
-        parsed = parse_cmdlet_args(args, self)
+        parsed = sh.parse_cmdlet_args(args, self)
        magnet_args = parsed.get("magnet", [])
        output_dir = Path(parsed.get("output") or Path.home() / "Downloads")
        wait_timeout = int(float(parsed.get("wait", 600)))
@@ -9,13 +9,13 @@ import subprocess
 import webbrowser

 import pipeline as ctx
-from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
+from . import _shared as sh
 from SYS.logger import log, debug
 from Store import Store
 from config import resolve_output_dir


-class Get_File(Cmdlet):
+class Get_File(sh.Cmdlet):
    """Export files to local path via hash+store."""
    
    def __init__(self) -> None:
@@ -25,10 +25,10 @@ class Get_File(Cmdlet):
            summary="Export file to local path",
            usage="@1 | get-file -path C:\\Downloads",
            arg=[
-                SharedArgs.HASH,
-                SharedArgs.STORE,
-                SharedArgs.PATH,
-                CmdletArg("name", description="Output filename (default: from metadata title)"),
+                sh.SharedArgs.HASH,
+                sh.SharedArgs.STORE,
+                sh.SharedArgs.PATH,
+                sh.CmdletArg("name", description="Output filename (default: from metadata title)"),
            ],
            detail=[
                "- Exports file from storage backend to local path",
@@ -42,12 +42,12 @@ class Get_File(Cmdlet):
    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Export file via hash+store backend."""
        debug(f"[get-file] run() called with result type: {type(result)}")
-        parsed = parse_cmdlet_args(args, self)
+        parsed = sh.parse_cmdlet_args(args, self)
        debug(f"[get-file] parsed args: {parsed}")
        
        # Extract hash and store from result or args
-        file_hash = parsed.get("hash") or get_field(result, "hash")
-        store_name = parsed.get("store") or get_field(result, "store")
+        file_hash = parsed.get("hash") or sh.get_field(result, "hash")
+        store_name = parsed.get("store") or sh.get_field(result, "store")
        output_path = parsed.get("path")
        output_name = parsed.get("name")
        
@@ -62,7 +62,7 @@ class Get_File(Cmdlet):
            return 1
        
        # Normalize hash
-        file_hash = normalize_hash(file_hash)
+        file_hash = sh.normalize_hash(file_hash)
        if not file_hash:
            log("Error: Invalid hash format")
            return 1
@@ -84,9 +84,9 @@ class Get_File(Cmdlet):

        def resolve_display_title() -> str:
            candidates = [
-                get_field(result, "title"),
-                get_field(result, "name"),
-                get_field(result, "filename"),
+                sh.get_field(result, "title"),
+                sh.get_field(result, "name"),
+                sh.get_field(result, "filename"),
                (metadata.get("title") if isinstance(metadata, dict) else None),
                (metadata.get("name") if isinstance(metadata, dict) else None),
                (metadata.get("filename") if isinstance(metadata, dict) else None),
@@ -7,7 +7,13 @@ import sys
 from SYS.logger import log
 from pathlib import Path

-from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+parse_cmdlet_args = sh.parse_cmdlet_args
+get_field = sh.get_field
 import pipeline as ctx
 from result_table import ResultTable

@@ -74,9 +80,15 @@ class Get_Metadata(Cmdlet):
                         hash_value: Optional[str], pages: Optional[int] = None) -> Dict[str, Any]:
        """Build a table row dict with metadata fields."""
        size_mb = None
-        if isinstance(size_bytes, int):
+        size_int: Optional[int] = None
+        if size_bytes is not None:
            try:
-                size_mb = int(size_bytes / (1024 * 1024))
+                size_int = int(size_bytes)
+            except Exception:
+                size_int = None
+        if isinstance(size_int, int):
+            try:
+                size_mb = int(size_int / (1024 * 1024))
            except Exception:
                size_mb = None

@@ -105,7 +117,7 @@ class Get_Metadata(Cmdlet):
            "path": path,
            "store": store,
            "mime": mime,
-            "size_bytes": size_bytes,
+            "size_bytes": size_int,
            "duration_seconds": dur_int,
            "pages": pages_int,
            "imported_ts": imported_ts,
@@ -237,8 +249,8 @@ class Get_Metadata(Cmdlet):
                pages=pages,
            )
            
-            table_title = title
-            table = ResultTable(table_title).init_command("get-metadata", list(args))
+            table_title = f"get-metadata: {title}" if title else "get-metadata"
+            table = ResultTable(table_title).init_command(table_title, "get-metadata", list(args))
            self._add_table_body_row(table, row)
            ctx.set_last_result_table_overlay(table, [row], row)
            ctx.emit(row)
@@ -7,15 +7,15 @@ import sys
 from SYS.logger import log

 import pipeline as ctx
-from ._shared import (
-    Cmdlet,
-    CmdletArg,
-    SharedArgs,
-    normalize_hash,
-    parse_cmdlet_args,
-    normalize_result_input,
-    should_show_help,
-)
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+normalize_hash = sh.normalize_hash
+parse_cmdlet_args = sh.parse_cmdlet_args
+normalize_result_input = sh.normalize_result_input
+should_show_help = sh.should_show_help
 from Store import Store
 from SYS.utils import sha256_file

@@ -10,7 +10,17 @@ from SYS.logger import log
 import models
 import pipeline as ctx
 from API import HydrusNetwork as hydrus_wrapper
-from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help, get_field
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+normalize_hash = sh.normalize_hash
+fmt_bytes = sh.fmt_bytes
+get_hash_for_operation = sh.get_hash_for_operation
+fetch_hydrus_metadata = sh.fetch_hydrus_metadata
+should_show_help = sh.should_show_help
+get_field = sh.get_field
 from API.folder import API_folder_store
 from config import get_local_storage_path
 from result_table import ResultTable
@@ -224,13 +234,14 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
        try:
            client = None
            store_label = "hydrus"
+            backend_obj = None
            if store_name:
                # Store specified: do not fall back to a global/default Hydrus client.
                store_label = str(store_name)
                try:
                    store = Store(config)
-                    backend = store[str(store_name)]
-                    candidate = getattr(backend, "_client", None)
+                    backend_obj = store[str(store_name)]
+                    candidate = getattr(backend_obj, "_client", None)
                    if candidate is not None and hasattr(candidate, "get_file_relationships"):
                        client = candidate
                except Exception:
@@ -241,6 +252,74 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
            else:
                client = hydrus_wrapper.get_client(config)

+            def _resolve_related_title(rel_hash: str) -> str:
+                """Best-effort resolve a Hydrus hash to a human title.
+
+                Preference order:
+                - title: tag from the backend (fast path)
+                - Hydrus metadata tags via fetch_hydrus_metadata
+                - fallback to short hash
+                """
+                h = normalize_hash(rel_hash)
+                if not h:
+                    return str(rel_hash)
+
+                # Prefer backend tag extraction when available.
+                if backend_obj is not None and hasattr(backend_obj, "get_tag"):
+                    try:
+                        tag_result = backend_obj.get_tag(h)
+                        tags = tag_result[0] if isinstance(tag_result, tuple) and tag_result else tag_result
+                        if isinstance(tags, list):
+                            for t in tags:
+                                if isinstance(t, str) and t.lower().startswith("title:"):
+                                    val = t.split(":", 1)[1].strip()
+                                    if val:
+                                        return val
+                    except Exception:
+                        pass
+
+                # Fallback: fetch minimal metadata and scan for a title tag.
+                try:
+                    meta, _ = fetch_hydrus_metadata(
+                        config,
+                        h,
+                        store_name=store_label if store_name else None,
+                        hydrus_client=client,
+                        include_service_keys_to_tags=True,
+                        include_file_url=False,
+                        include_duration=False,
+                        include_size=False,
+                        include_mime=False,
+                    )
+                    if isinstance(meta, dict):
+                        tags_payload = meta.get("tags")
+                        tag_candidates: list[str] = []
+                        if isinstance(tags_payload, dict):
+                            for svc_data in tags_payload.values():
+                                if not isinstance(svc_data, dict):
+                                    continue
+                                storage = svc_data.get("storage_tags")
+                                if isinstance(storage, dict):
+                                    for group in storage.values():
+                                        if isinstance(group, list):
+                                            tag_candidates.extend([str(x) for x in group if isinstance(x, str)])
+                                display = svc_data.get("display_tags")
+                                if isinstance(display, list):
+                                    tag_candidates.extend([str(x) for x in display if isinstance(x, str)])
+                        flat = meta.get("tags_flat")
+                        if isinstance(flat, list):
+                            tag_candidates.extend([str(x) for x in flat if isinstance(x, str)])
+
+                        for t in tag_candidates:
+                            if isinstance(t, str) and t.lower().startswith("title:"):
+                                val = t.split(":", 1)[1].strip()
+                                if val:
+                                    return val
+                except Exception:
+                    pass
+
+                return h[:16] + "..."
+
            if client:
                rel = client.get_file_relationships(hash_hex)
                if rel:
@@ -274,7 +353,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
                                        found_relationships.append({
                                            "hash": king_hash,
                                            "type": "king",
-                                            "title": king_hash,
+                                            "title": _resolve_related_title(king_hash),
                                            "path": None,
                                            "store": store_label,
                                        })
@@ -292,7 +371,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
                                        found_relationships.append({
                                            "hash": rel_hash_norm,
                                            "type": rel_name,
-                                            "title": rel_hash_norm,  # Can't resolve title easily without another API call
+                                            "title": _resolve_related_title(rel_hash_norm),
                                            "path": None,
                                            "store": store_label,
                                        })
@@ -304,7 +383,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
                                        found_relationships.append({
                                            "hash": rel_hash_norm,
                                            "type": rel_name,
-                                            "title": rel_hash_norm,
+                                            "title": _resolve_related_title(rel_hash_norm),
                                            "path": None,
                                            "store": store_label,
                                        })
@@ -27,7 +27,15 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
 import pipeline as ctx
 from API import HydrusNetwork
 from API.folder import read_sidecar, write_sidecar, find_sidecar, API_folder_store
-from ._shared import normalize_hash, looks_like_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field
+from . import _shared as sh
+
+normalize_hash = sh.normalize_hash
+looks_like_hash = sh.looks_like_hash
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+parse_cmdlet_args = sh.parse_cmdlet_args
+get_field = sh.get_field
 from config import get_local_storage_path


@@ -5,7 +5,15 @@ from typing import Any, Dict, List, Sequence
 import sys

 import pipeline as ctx
-from ._shared import Cmdlet, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
+from . import _shared as sh
+
+Cmdlet, SharedArgs, parse_cmdlet_args, get_field, normalize_hash = (
+    sh.Cmdlet,
+    sh.SharedArgs,
+    sh.parse_cmdlet_args,
+    sh.get_field,
+    sh.normalize_hash,
+)
 from SYS.logger import log
 from Store import Store

@@ -12,17 +12,17 @@ import re as _re

 from config import resolve_output_dir

-from ._shared import (
-    Cmdlet,
-    CmdletArg,
-    create_pipe_object_result,
-    get_field,
-    get_pipe_object_hash,
-    get_pipe_object_path,
-    normalize_result_input,
-    parse_cmdlet_args,
-    should_show_help,
-)
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+create_pipe_object_result = sh.create_pipe_object_result
+get_field = sh.get_field
+get_pipe_object_hash = sh.get_pipe_object_hash
+get_pipe_object_path = sh.get_pipe_object_path
+normalize_result_input = sh.normalize_result_input
+parse_cmdlet_args = sh.parse_cmdlet_args
+should_show_help = sh.should_show_help

 import pipeline as ctx

@@ -20,7 +20,16 @@ from urllib.parse import urlsplit, quote, urljoin
 from SYS.logger import log, debug
 from API.HTTP import HTTPClient
 from SYS.utils import ensure_directory, unique_path, unique_preserve_order
-from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, should_show_help, get_field
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+create_pipe_object_result = sh.create_pipe_object_result
+normalize_result_input = sh.normalize_result_input
+should_show_help = sh.should_show_help
+get_field = sh.get_field
+parse_cmdlet_args = sh.parse_cmdlet_args
 import pipeline as pipeline_context

 # ============================================================================
@@ -33,20 +42,7 @@ import pipeline as pipeline_context
 # Playwright & Screenshot Dependencies
 # ============================================================================

-try:
-    from playwright.sync_api import (
-        TimeoutError as PlaywrightTimeoutError,
-        sync_playwright,
-    )
-    HAS_PLAYWRIGHT = True
-except Exception:
-    HAS_PLAYWRIGHT = False
-    PlaywrightTimeoutError = TimeoutError  # type: ignore
-
-    def sync_playwright(*_args: Any, **_kwargs: Any) -> Any:  # type: ignore
-        raise RuntimeError(
-            "playwright is required for screenshot capture; install with: pip install playwright; then: playwright install"
-        )
+from tool.playwright import HAS_PLAYWRIGHT, PlaywrightTimeoutError, PlaywrightTool

 try:
    from config import resolve_output_dir
@@ -128,6 +124,7 @@ class ScreenshotOptions:
    prefer_platform_target: bool = False
    target_selectors: Optional[Sequence[str]] = None
    selector_timeout_ms: int = 10_000
+    playwright_tool: Optional[PlaywrightTool] = None


@dataclass(slots=True)
@@ -324,33 +321,22 @@ def _prepare_output_path(options: ScreenshotOptions) -> Path:
 def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None:
    """Capture screenshot using Playwright."""
    debug(f"[_capture] Starting capture for {options.url} -> {destination}")
-    playwright = None
-    browser = None
-    context = None
    try:
-        debug("Starting Playwright...", flush=True)
-        playwright = sync_playwright().start()
-        log("Launching Chromium browser...", flush=True)
+        tool = options.playwright_tool or PlaywrightTool({})
+        tool.debug_dump()
+
+        log("Launching browser...", flush=True)
        format_name = _normalise_format(options.output_format)
        headless = options.headless or format_name == "pdf"
        debug(f"[_capture] Format: {format_name}, Headless: {headless}")
        
        if format_name == "pdf" and not options.headless:
            warnings.append("pdf output requires headless Chromium; overriding headless mode")
-        browser = playwright.chromium.launch(
-            headless=headless,
-            args=["--disable-blink-features=AutomationControlled"],
-        )
-        log("Creating browser context...", flush=True)
-        context = browser.new_context(
-            user_agent=USER_AGENT,
-            viewport=DEFAULT_VIEWPORT,
-            ignore_https_errors=True,
-        )
-        page = context.new_page()
+
+        with tool.open_page(headless=headless) as page:
            log(f"Navigating to {options.url}...", flush=True)
            try:
-            page.goto(options.url, timeout=90_000, wait_until="domcontentloaded")
+                tool.goto(page, options.url)
                log("Page loaded successfully", flush=True)
            except PlaywrightTimeoutError:
                warnings.append("navigation timeout; capturing current page state")
@@ -448,18 +434,6 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
    except Exception as exc:
        debug(f"[_capture] Exception: {exc}")
        raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
-    finally:
-        log("Cleaning up browser resources...", flush=True)
-        with contextlib.suppress(Exception):
-            if context is not None:
-                context.close()
-        with contextlib.suppress(Exception):
-            if browser is not None:
-                browser.close()
-        with contextlib.suppress(Exception):
-            if playwright is not None:
-                playwright.stop()
-        log("Cleanup complete", flush=True)


 def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
@@ -511,8 +485,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    Screenshots are created using Playwright and marked as temporary
    so they can be cleaned up later with the cleanup cmdlet.
    """
-    from ._shared import parse_cmdlet_args
-    
    debug(f"[_run] screen-shot invoked with args: {args}")

    # Help check
@@ -534,6 +506,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    parsed = parse_cmdlet_args(args, CMDLET)
    
    format_value = parsed.get("format")
+    if not format_value:
+        # Default format can be set via config.conf tool block:
+        #   [tool=playwright]
+        #   format="pdf"
+        try:
+            tool_cfg = config.get("tool", {}) if isinstance(config, dict) else {}
+            pw_cfg = tool_cfg.get("playwright") if isinstance(tool_cfg, dict) else None
+            if isinstance(pw_cfg, dict):
+                format_value = pw_cfg.get("format")
+        except Exception:
+            pass
+    if not format_value:
+        format_value = "png"
    storage_value = parsed.get("storage")
    selector_arg = parsed.get("selector")
    selectors = [selector_arg] if selector_arg else []
@@ -669,6 +654,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
                prefer_platform_target=False,
                wait_for_article=False,
                full_page=True,
+                playwright_tool=PlaywrightTool(config),
            )
            
            screenshot_result = _capture_screenshot(options)
@@ -10,7 +10,13 @@ import importlib
 from SYS.logger import log, debug
 from ProviderCore.registry import get_search_provider, list_search_providers

-from ._shared import Cmdlet, CmdletArg, should_show_help
+from . import _shared as sh
+
+Cmdlet, CmdletArg, should_show_help = (
+    sh.Cmdlet,
+    sh.CmdletArg,
+    sh.should_show_help,
+)
 import pipeline as ctx

 # Optional dependencies
@@ -10,7 +10,17 @@ import sys

 from SYS.logger import log, debug

-from ._shared import Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag
+from . import _shared as sh
+
+Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag = (
+    sh.Cmdlet,
+    sh.CmdletArg,
+    sh.SharedArgs,
+    sh.get_field,
+    sh.should_show_help,
+    sh.normalize_hash,
+    sh.first_title_tag,
+)
 import pipeline as ctx


@@ -209,6 +219,10 @@ class Search_Store(Cmdlet):
                    table_title += f" [{storage_backend}]"

                table = ResultTable(table_title)
+                try:
+                    table.set_source_command("search-store", list(args_list))
+                except Exception:
+                    pass
                if hash_query:
                    try:
                        table.set_preserve_order(True)
@@ -309,6 +323,11 @@ class Search_Store(Cmdlet):
                                ext_val = Path(path_str).suffix
                            except Exception:
                                ext_val = None
+                        if not ext_val and title:
+                            try:
+                                ext_val = Path(str(title)).suffix
+                            except Exception:
+                                ext_val = None

                        size_bytes = meta_obj.get("size")
                        if size_bytes is None:
@@ -333,6 +352,20 @@ class Search_Store(Cmdlet):
                        ctx.emit(payload)

                    if found_any:
+                        # Title should reflect the command, query, and only stores present in the table.
+                        store_counts: "OrderedDict[str, int]" = OrderedDict()
+                        for row_item in results_list:
+                            store_val = str(row_item.get("store") or "").strip()
+                            if not store_val:
+                                continue
+                            if store_val not in store_counts:
+                                store_counts[store_val] = 0
+                            store_counts[store_val] += 1
+
+                        counts_part = " ".join(f"{name}:{count}" for name, count in store_counts.items() if count > 0)
+                        base_title = f"search-store: {query}".strip()
+                        table.title = f"{base_title} | {counts_part}" if counts_part else base_title
+
                        ctx.set_last_result_table(table, results_list)
                        db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
                        db.update_worker_status(worker_id, 'completed')
@@ -377,28 +410,6 @@ class Search_Store(Cmdlet):
                            log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
                    results = all_results[:limit]

-                def _format_storage_label(name: str) -> str:
-                    clean = str(name or "").strip()
-                    if not clean:
-                        return "Unknown"
-                    return clean.replace("_", " ").title()
-
-                storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends)
-                for item in results or []:
-                    store = get_field(item, "store")
-                    if not store:
-                        continue
-                    key = str(store).lower()
-                    if key not in storage_counts:
-                        storage_counts[key] = 0
-                    storage_counts[key] += 1
-
-                if storage_counts or query:
-                    display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items())
-                    summary_line = table.set_storage_summary(display_counts, query, inline=True)
-                    if summary_line:
-                        table.title = summary_line
-
                if results:
                    for item in results:
                        def _as_dict(obj: Any) -> Dict[str, Any]:
@@ -428,6 +439,20 @@ class Search_Store(Cmdlet):
                        results_list.append(normalized)
                        ctx.emit(normalized)

+                    # Title should reflect the command, query, and only stores present in the table.
+                    store_counts: "OrderedDict[str, int]" = OrderedDict()
+                    for row_item in results_list:
+                        store_val = str(row_item.get("store") or "").strip()
+                        if not store_val:
+                            continue
+                        if store_val not in store_counts:
+                            store_counts[store_val] = 0
+                        store_counts[store_val] += 1
+
+                    counts_part = " ".join(f"{name}:{count}" for name, count in store_counts.items() if count > 0)
+                    base_title = f"search-store: {query}".strip()
+                    table.title = f"{base_title} | {counts_part}" if counts_part else base_title
+
                    ctx.set_last_result_table(table, results_list)
                    db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
                else:
@@ -11,14 +11,14 @@ import re

 from SYS.logger import log, debug
 from SYS.utils import sha256_file
-from ._shared import (
-    Cmdlet,
-    CmdletArg,
-    parse_cmdlet_args,
-    normalize_result_input,
-    extract_tag_from_result,
-    extract_title_from_result
-)
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+parse_cmdlet_args = sh.parse_cmdlet_args
+normalize_result_input = sh.normalize_result_input
+extract_tag_from_result = sh.extract_tag_from_result
+extract_title_from_result = sh.extract_title_from_result
 import pipeline as ctx

 CMDLET = Cmdlet(
@@ -26,9 +26,9 @@ def ensure_registry_loaded() -> None:
 def _normalize_mod_name(mod_name: str) -> str:
    """Normalize a command/module name for import resolution."""
    normalized = (mod_name or "").strip()
-    if normalized.startswith('.'):
-        normalized = normalized.lstrip('.')
-    normalized = normalized.replace('-', '_')
+    if normalized.startswith("."):
+        normalized = normalized.lstrip(".")
+    normalized = normalized.replace("-", "_")
    return normalized


@@ -83,7 +83,7 @@ def get_cmdlet_metadata(cmd_name: str) -> Optional[Dict[str, Any]]:

    if data is None:
        try:
-            reg_fn = (REGISTRY or {}).get(cmd_name.replace('_', '-').lower())
+            reg_fn = (REGISTRY or {}).get(cmd_name.replace("_", "-").lower())
            if reg_fn:
                owner_mod = getattr(reg_fn, "__module__", "")
                if owner_mod:
@@ -186,8 +186,6 @@ def get_cmdlet_arg_flags(cmd_name: str) -> List[str]:
    if not meta:
        return []

-    # Preserve the order that arguments are defined on the cmdlet (arg=[...]) so
-    # completions feel stable and predictable.
    flags: List[str] = []
    seen: set[str] = set()

@@ -135,7 +135,7 @@ def _render_detail(meta: Dict[str, Any], args: Sequence[str]) -> None:

 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    try:
-        from cmdlet import catalog as _catalog
+        import cmdlet_catalog as _catalog

        CMDLET.arg[0].choices = _normalize_choice_list(_catalog.list_cmdlet_names())
        metadata = _catalog.list_cmdlet_metadata()
@@ -16,7 +16,7 @@ from models import PipeObject

 from API.folder import LocalLibrarySearchOptimizer
 from config import get_local_storage_path, get_hydrus_access_key, get_hydrus_url
-from hydrus_health_check import get_cookies_file_path
+


 _ALLDEBRID_UNLOCK_CACHE: Dict[str, str] = {}
@@ -372,12 +372,18 @@ def _build_hydrus_header(config: Dict[str, Any]) -> Optional[str]:
 def _build_ytdl_options(config: Optional[Dict[str, Any]], hydrus_header: Optional[str]) -> Optional[str]:
    """Compose ytdl-raw-options string including cookies and optional Hydrus header."""
    opts: List[str] = []
+    cookies_path = None
    try:
-        cookies_path = get_cookies_file_path()
+        from tool.ytdlp import YtDlpTool
+
+        cookiefile = YtDlpTool(config or {}).resolve_cookiefile()
+        if cookiefile is not None:
+            cookies_path = str(cookiefile)
    except Exception:
        cookies_path = None
+
    if cookies_path:
-        opts.append(f"cookies={cookies_path.replace('\\', '/')}")
+        opts.append(f"cookies={cookies_path.replace('\\', '/')}" )
    else:
        opts.append("cookies-from-browser=chrome")
    if hydrus_header:
@@ -407,10 +413,18 @@ def _is_hydrus_path(path: str, hydrus_url: Optional[str]) -> bool:
        return True
    return False

-def _ensure_ytdl_cookies() -> None:
+def _ensure_ytdl_cookies(config: Optional[Dict[str, Any]] = None) -> None:
    """Ensure yt-dlp options are set correctly for this session."""
    from pathlib import Path
-    cookies_path = get_cookies_file_path()
+    cookies_path = None
+    try:
+        from tool.ytdlp import YtDlpTool
+
+        cookiefile = YtDlpTool(config or {}).resolve_cookiefile()
+        if cookiefile is not None:
+            cookies_path = str(cookiefile)
+    except Exception:
+        cookies_path = None
    if cookies_path:
        # Check if file exists and has content (use forward slashes for path checking)
        check_path = cookies_path.replace('\\', '/')
@@ -635,7 +649,7 @@ def _queue_items(
        pass

    # Just verify cookies are configured, don't try to set via IPC
-    _ensure_ytdl_cookies()
+    _ensure_ytdl_cookies(config)

    hydrus_header = _build_hydrus_header(config or {})
    ytdl_opts = _build_ytdl_options(config, hydrus_header)
@@ -1426,7 +1440,15 @@ def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None, start_
    hydrus_header = _build_hydrus_header(config or {})
    ytdl_opts = _build_ytdl_options(config, hydrus_header)

-    cookies_path = get_cookies_file_path()
+    cookies_path = None
+    try:
+        from tool.ytdlp import YtDlpTool
+
+        cookiefile = YtDlpTool(config or {}).resolve_cookiefile()
+        if cookiefile is not None:
+            cookies_path = str(cookiefile)
+    except Exception:
+        cookies_path = None
    if cookies_path:
        debug(f"Starting MPV with cookies file: {cookies_path.replace('\\', '/')}")
    else:
@@ -1,11 +1,5 @@

-"""Unified configuration helpers.
-
-Configuration is defined exclusively via the modular `.conf` format.
-
- Required: `temp`
- Optional: stores, providers, and other settings
- Modular: optional fragments in `config.d/*.conf` are merged in lexicographic order
+"""
 """
 from __future__ import annotations

@@ -130,6 +124,21 @@ def _apply_conf_block(config: Dict[str, Any], kind: str, subtype: str, block: Di
            provider[provider_name] = dict(block)
        return

+    if kind_l == "tool":
+        tool_name = str(subtype).strip().lower()
+        if not tool_name:
+            return
+        tool = config.setdefault("tool", {})
+        if not isinstance(tool, dict):
+            config["tool"] = {}
+            tool = config["tool"]
+        existing = tool.get(tool_name)
+        if isinstance(existing, dict):
+            _merge_dict_inplace(existing, block)
+        else:
+            tool[tool_name] = dict(block)
+        return
+

 def parse_conf_text(text: str, *, base: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
    """Parse a lightweight .conf format into the app's config dict.
@@ -227,7 +236,7 @@ def _serialize_conf(config: Dict[str, Any]) -> str:

    # Top-level scalars first
    for key in sorted(config.keys()):
-        if key in {"store", "provider"}:
+        if key in {"store", "provider", "tool"}:
            continue
        value = config.get(key)
        if isinstance(value, dict):
@@ -263,6 +272,18 @@ def _serialize_conf(config: Dict[str, Any]) -> str:
            for k in sorted(block.keys()):
                lines.append(f"{k}={_format_conf_value(block.get(k))}")

+    # Tool blocks
+    tool = config.get("tool")
+    if isinstance(tool, dict):
+        for name in sorted(tool.keys()):
+            block = tool.get(name)
+            if not isinstance(block, dict):
+                continue
+            lines.append("")
+            lines.append(f"[tool={name}]")
+            for k in sorted(block.keys()):
+                lines.append(f"{k}={_format_conf_value(block.get(k))}")
+
    return "\n".join(lines).rstrip() + "\n"


@@ -510,12 +531,43 @@ def get_provider_credentials(config: Dict[str, Any], provider: str) -> Optional[


 def resolve_cookies_path(config: Dict[str, Any], script_dir: Optional[Path] = None) -> Optional[Path]:
-    value = config.get("cookies")
-    if value:
+    # Support both legacy top-level `cookies=...` and the modular conf style:
+    #   [tool=ytdlp]
+    #   cookies="C:\\path\\cookies.txt"
+    values: list[Any] = []
+    try:
+        values.append(config.get("cookies"))
+    except Exception:
+        pass
+
+    try:
+        tool = config.get("tool")
+        if isinstance(tool, dict):
+            ytdlp = tool.get("ytdlp")
+            if isinstance(ytdlp, dict):
+                values.append(ytdlp.get("cookies"))
+                values.append(ytdlp.get("cookiefile"))
+    except Exception:
+        pass
+
+    try:
+        ytdlp_block = config.get("ytdlp")
+        if isinstance(ytdlp_block, dict):
+            values.append(ytdlp_block.get("cookies"))
+            values.append(ytdlp_block.get("cookiefile"))
+    except Exception:
+        pass
+
+    base_dir = script_dir or SCRIPT_DIR
+    for value in values:
+        if not value:
+            continue
        candidate = Path(str(value)).expanduser()
+        if not candidate.is_absolute():
+            candidate = (base_dir / candidate).expanduser()
        if candidate.is_file():
            return candidate
-    base_dir = script_dir or SCRIPT_DIR
+
    default_path = base_dir / "cookies.txt"
    if default_path.is_file():
        return default_path
@@ -1,43 +0,0 @@
-"""Cookies availability helpers.
-
-This module is intentionally limited to cookie-file resolution used by yt-dlp.
-Other service availability checks live in their owning store/provider objects.
-"""
-
-import sys
-
-from pathlib import Path
-from typing import Any, Dict, Optional, Tuple
-
-from SYS.logger import debug
-
-# Global state for Cookies availability
-_COOKIES_FILE_PATH: Optional[str] = None
-
-
-def initialize_cookies_check(config: Optional[Dict[str, Any]] = None, emit_debug: bool = True) -> Tuple[bool, str]:
-    """Resolve cookies file path from config, falling back to cookies.txt in app root.
-
-    Returns a tuple of (found, detail_message).
-    """
-    global _COOKIES_FILE_PATH
-
-    try:
-        from config import resolve_cookies_path
-        cookies_path = resolve_cookies_path(config or {}, script_dir=Path(__file__).parent)
-    except Exception:
-        cookies_path = None
-
-    if cookies_path and cookies_path.exists():
-        _COOKIES_FILE_PATH = str(cookies_path)
-        if emit_debug:
-            debug(f"Cookies: ENABLED - Found cookies file", file=sys.stderr)
-        return True, str(cookies_path)
-    else:
-        _COOKIES_FILE_PATH = None
-        return False, "Not found"
-
-
-def get_cookies_file_path() -> Optional[str]:
-    """Get the path to the cookies.txt file if it exists."""
-    return _COOKIES_FILE_PATH
@@ -348,6 +348,8 @@ class DownloadOptions:
    playlist_items: Optional[str] = None  # yt-dlp --playlist-items format (e.g., "1-3,5,8")
    no_playlist: bool = False  # If True, pass --no-playlist to yt-dlp
    quiet: bool = False  # If True, suppress all console output (progress, debug logs)
+    embed_chapters: bool = False  # If True, pass yt-dlp --embed-chapters / embedchapters
+    write_sub: bool = False  # If True, download subtitles (writesubtitles/writeautomaticsub)


 class SendFunc(Protocol):
@@ -35,7 +35,7 @@ dependencies = [
    "textual>=0.30.0",

    # Media processing and downloading
-    "yt-dlp>=2023.11.0",
+    "yt-dlp[default]>=2023.11.0",
    "yt-dlp-ejs",  # EJS challenge solver scripts for YouTube JavaScript challenges
    "requests>=2.31.0",
    "httpx>=0.25.0",
@@ -43,7 +43,6 @@ dependencies = [

    # Document and data handling
    "pypdf>=3.0.0",
-    "img2pdf>=0.6.0",
    "mutagen>=1.46.0",
    "cbor2>=4.0",

@@ -53,7 +52,6 @@ dependencies = [

    # Metadata extraction and processing
    "musicbrainzngs>=0.7.0",
-    "beautifulsoup4>=4.12.0",
    "lxml>=4.9.0",

    # Advanced searching and libraries
@@ -4,14 +4,13 @@ prompt-toolkit>=3.0.0
 textual>=0.30.0

 # Media processing and downloading
-yt-dlp>=2023.11.0
+yt-dlp[default]>=2023.11.0
 requests>=2.31.0
 httpx>=0.25.0
 ffmpeg-python>=0.2.0

 # Document and data handling
 pypdf>=3.0.0
-img2pdf>=0.6.0
 mutagen>=1.46.0
 cbor2>=4.0

@@ -21,7 +20,6 @@ python-bidi>=0.4.2

 # Metadata extraction and processing
 musicbrainzngs>=0.7.0
-beautifulsoup4>=4.12.0
 lxml>=4.9.0

 # Advanced searching and libraries
@@ -1,336 +0,0 @@
-import requests
-import random, string
-from concurrent import futures
-from tqdm import tqdm
-import time
-from datetime import datetime
-import argparse
-import os
-import sys
-import shutil
-import json
-import re
-import base64
-import hashlib
-from Crypto.Cipher import AES
-from Crypto.Util import Counter
-
-def display_error(response, message):
-	print(message)
-	print(response)
-	print(response.text)
-	exit()
-
-def get_book_infos(session, url):
-	r = session.get(url).text
-	infos_url = "https:" + r.split('"url":"')[1].split('"')[0].replace("\\u0026", "&")
-	response = session.get(infos_url)
-	data = response.json()['data']
-	title = data['brOptions']['bookTitle'].strip().replace(" ", "_")
-	title = ''.join( c for c in title if c not in '<>:"/\\|?*' ) # Filter forbidden chars in directory names (Windows & Linux)
-	title = title[:150] # Trim the title to avoid long file names	
-	metadata = data['metadata']
-	links = []
-	for item in data['brOptions']['data']:
-		for page in item:
-			links.append(page['uri'])
-
-	if len(links) > 1:
-		print(f"[+] Found {len(links)} pages")
-		return title, links, metadata
-	else:
-		print(f"[-] Error while getting image links")
-		exit()
-
-def login(email, password):
-	session = requests.Session()
-	response = session.get("https://archive.org/services/account/login/")
-	login_data = response.json()
-	if not login_data['success']:
-		display_error(response, "[-] Error while getting login token:")
-
-	login_token = login_data["value"]["token"]
-
-	headers = {"Content-Type": "application/x-www-form-urlencoded"}
-	data = {"username":email, "password":password, "t": login_token}
-	
-	response = session.post("https://archive.org/services/account/login/", headers=headers, data=json.dumps(data))
-	try:
-		response_json = response.json()
-	except:
-		display_error(response, "[-] Error while login:")
-	
-	if response_json["success"] == False:
-		if response_json["value"] == "bad_login":
-			print("[-] Invalid credentials!")
-			exit()
-		display_error(response, "[-] Error while login:")
-	else:
-		print("[+] Successful login")
-		return session
-
-def loan(session, book_id, verbose=True):
-	data = {
-		"action": "grant_access",
-		"identifier": book_id
-	}
-	response = session.post("https://archive.org/services/loans/loan/searchInside.php", data=data)
-	data['action'] = "browse_book"
-	response = session.post("https://archive.org/services/loans/loan/", data=data)
-
-	if response.status_code == 400 :
-		try:
-			if response.json()["error"] == "This book is not available to borrow at this time. Please try again later.":
-				print("This book doesn't need to be borrowed")
-				return session
-			else :
-				display_error(response, "Something went wrong when trying to borrow the book.")
-		except: # The response is not in JSON format
-			display_error(response, "The book cannot be borrowed")
-
-	data['action'] = "create_token"
-	response = session.post("https://archive.org/services/loans/loan/", data=data)
-
-	if "token" in response.text:
-		if verbose:
-			print("[+] Successful loan")
-		return session
-	else:
-		display_error(response, "Something went wrong when trying to borrow the book, maybe you can't borrow this book.")
-
-def return_loan(session, book_id):
-	data = {
-		"action": "return_loan",
-		"identifier": book_id
-	}
-	response = session.post("https://archive.org/services/loans/loan/", data=data)
-	if response.status_code == 200 and response.json()["success"]:
-		print("[+] Book returned")
-	else:
-		display_error(response, "Something went wrong when trying to return the book")
-
-def image_name(pages, page, directory):
-	return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
-
-def deobfuscate_image(image_data, link, obf_header):
-	"""
-	@Author: https://github.com/justimm
-	Decrypts the first 1024 bytes of image_data using AES-CTR.
-	The obfuscation_header is expected in the form "1|<base64encoded_counter>"
-	where the base64-decoded counter is 16 bytes.
-	We derive the AES key by taking the SHA-1 digest of the image URL (with protocol/host removed)
-	and using the first 16 bytes.
-	For AES-CTR, we use a 16-byte counter block. The first 8 bytes are used as a fixed prefix,
-	and the remaining 8 bytes (interpreted as a big-endian integer) are used as the initial counter value.
-	"""
-	try:
-		version, counter_b64 = obf_header.split('|')
-	except Exception as e:
-		raise ValueError("Invalid X-Obfuscate header format") from e
-
-	if version != '1':
-		raise ValueError("Unsupported obfuscation version: " + version)
-
-	# Derive AES key: replace protocol/host in link with '/'
-	aesKey = re.sub(r"^https?:\/\/.*?\/", "/", link)
-	sha1_digest = hashlib.sha1(aesKey.encode('utf-8')).digest()
-	key = sha1_digest[:16]
-
-	# Decode the counter (should be 16 bytes)
-	counter_bytes = base64.b64decode(counter_b64)
-	if len(counter_bytes) != 16:
-		raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
-
-	prefix = counter_bytes[:8]
-	initial_value = int.from_bytes(counter_bytes[8:], byteorder='big')
-
-	# Create AES-CTR cipher with a 64-bit counter length.
-	ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False)
-	cipher = AES.new(key, AES.MODE_CTR, counter=ctr)
-
-	decrypted_part = cipher.decrypt(image_data[:1024])
-	new_data = decrypted_part + image_data[1024:]
-	return new_data	
-
-def download_one_image(session, link, i, directory, book_id, pages):
-	headers = {
-		"Referer": "https://archive.org/",
-		"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
-		"Sec-Fetch-Site": "same-site",
-		"Sec-Fetch-Mode": "no-cors",
-		"Sec-Fetch-Dest": "image",
-	}
-	retry = True
-	response = None
-	while retry:
-		try:
-			response = session.get(link, headers=headers)
-			if response.status_code == 403:
-				session = loan(session, book_id, verbose=False)
-				raise Exception("Borrow again")
-			elif response.status_code == 200:
-				retry = False
-		except:
-			time.sleep(1)	# Wait 1 second before retrying
-
-	image = image_name(pages, i, directory)
-
-	obf_header = response.headers.get("X-Obfuscate")
-	image_content = None
-	if obf_header:
-		try:
-			image_content = deobfuscate_image(response.content, link, obf_header)
-		except Exception as e:
-			print(f"[ERROR] Deobfuscation failed: {e}")
-			return
-	else:
-		image_content = response.content
-	
-	with open(image, "wb") as f:
-		f.write(image_content)
-
-def download(session, n_threads, directory, links, scale, book_id):
-	print("Downloading pages...")
-	links = [f"{link}&rotate=0&scale={scale}" for link in links]
-	pages = len(links)
-
-	tasks = []
-	with futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
-		for link in links:
-			i = links.index(link)
-			tasks.append(executor.submit(download_one_image, session=session, link=link, i=i, directory=directory, book_id=book_id, pages=pages))
-		for task in tqdm(futures.as_completed(tasks), total=len(tasks)):
-			pass
-	
-	images = [image_name(pages, i, directory) for i in range(len(links))]
-	return images
-
-def make_pdf(pdf, title, directory):
-	file = title+".pdf"
-	# Handle the case where multiple books with the same name are downloaded
-	i = 1
-	while os.path.isfile(os.path.join(directory, file)):
-		file = f"{title}({i}).pdf"
-		i += 1
-
-	with open(os.path.join(directory, file),"wb") as f:
-		f.write(pdf)
-	print(f"[+] PDF saved as \"{file}\"")
-
-if __name__ == "__main__":
-
-	my_parser = argparse.ArgumentParser()
-	my_parser.add_argument('-e', '--email', help='Your archive.org email', type=str, required=True)
-	my_parser.add_argument('-p', '--password', help='Your archive.org password', type=str, required=True)
-	my_parser.add_argument('-u', '--url', help='Link to the book (https://archive.org/details/XXXX). You can use this argument several times to download multiple books', action='append', type=str)
-	my_parser.add_argument('-d', '--dir', help='Output directory', type=str)
-	my_parser.add_argument('-f', '--file', help='File where are stored the URLs of the books to download', type=str)
-	my_parser.add_argument('-r', '--resolution', help='Image resolution (10 to 0, 0 is the highest), [default 3]', type=int, default=3)
-	my_parser.add_argument('-t', '--threads', help="Maximum number of threads, [default 50]", type=int, default=50)
-	my_parser.add_argument('-j', '--jpg', help="Output to individual JPG's rather than a PDF", action='store_true')
-	my_parser.add_argument('-m', '--meta', help="Output the metadata of the book to a json file (-j option required)", action='store_true')
-
-	if len(sys.argv) == 1:
-		my_parser.print_help(sys.stderr)
-		sys.exit(1)
-	args = my_parser.parse_args()
-
-	if args.url is None and args.file is None:
-		my_parser.error("At least one of --url and --file required")
-
-	email = args.email
-	password = args.password
-	scale = args.resolution
-	n_threads = args.threads
-	d = args.dir
-
-	if d == None:
-		d = os.getcwd()
-	elif not os.path.isdir(d):
-		print(f"Output directory does not exist!")
-		exit()
-
-	if args.url is not None:
-		urls = args.url
-	else:
-		if os.path.exists(args.file):
-			with open(args.file) as f:
-				urls = f.read().strip().split("\n")
-		else:
-			print(f"{args.file} does not exist!")
-			exit()
-
-	# Check the urls format
-	for url in urls:
-		if not url.startswith("https://archive.org/details/"):
-			print(f"{url} --> Invalid url. URL must starts with \"https://archive.org/details/\"")
-			exit()
-
-	print(f"{len(urls)} Book(s) to download")
-	session = login(email, password)
-
-	for url in urls:
-		book_id = list(filter(None, url.split("/")))[3]
-		print("="*40)
-		print(f"Current book: https://archive.org/details/{book_id}")
-		session = loan(session, book_id)
-		title, links, metadata = get_book_infos(session, url)
-
-		directory = os.path.join(d, title)
-		# Handle the case where multiple books with the same name are downloaded
-		i = 1
-		_directory = directory
-		while os.path.isdir(directory):
-			directory = f"{_directory}({i})"
-			i += 1
-		os.makedirs(directory)
-		
-		if args.meta:
-			print("Writing metadata.json...")
-			with open(f"{directory}/metadata.json",'w') as f:
-				json.dump(metadata,f)
-
-		images = download(session, n_threads, directory, links, scale, book_id)
-
-		if not args.jpg: # Create pdf with images and remove the images folder
-			import img2pdf
-
-			# prepare PDF metadata
-			# sometimes archive metadata is missing
-			pdfmeta = { }
-			# ensure metadata are str
-			for key in ["title", "creator", "associated-names"]:
-				if key in metadata:
-					if isinstance(metadata[key], str):
-						pass
-					elif isinstance(metadata[key], list):
-						metadata[key] = "; ".join(metadata[key])
-					else:
-						raise Exception("unsupported metadata type")
-			# title
-			if 'title' in metadata:
-				pdfmeta['title'] = metadata['title']
-			# author
-			if 'creator' in metadata and 'associated-names' in metadata:
-				pdfmeta['author'] = metadata['creator'] + "; " + metadata['associated-names']
-			elif 'creator' in metadata:
-				pdfmeta['author'] = metadata['creator']
-			elif 'associated-names' in metadata:
-				pdfmeta['author'] = metadata['associated-names']
-			# date
-			if 'date' in metadata:
-				try:
-					pdfmeta['creationdate'] = datetime.strptime(metadata['date'][0:4], '%Y')
-				except:
-					pass
-			# keywords
-			pdfmeta['keywords'] = [f"https://archive.org/details/{book_id}"]
-
-			pdf = img2pdf.convert(images, **pdfmeta)
-			make_pdf(pdf, title, args.dir if args.dir != None else "")
-			try:
-				shutil.rmtree(directory)
-			except OSError as e:
-				print ("Error: %s - %s." % (e.filename, e.strerror))
-
-		return_loan(session, book_id)
@@ -0,0 +1,11 @@
+"""Tool helpers.
+
+This package contains wrappers around external tools (e.g. yt-dlp) so cmdlets can share
+common defaults (cookies, timeouts, format selectors) and users can override them via
+`config.conf`.
+"""
+
+from .ytdlp import YtDlpTool, YtDlpDefaults
+from .playwright import PlaywrightTool, PlaywrightDefaults
+
+__all__ = ["YtDlpTool", "YtDlpDefaults", "PlaywrightTool", "PlaywrightDefaults"]
@@ -0,0 +1,203 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Dict, Iterator, Optional
+
+from SYS.logger import debug
+
+try:
+    from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
+    from playwright.sync_api import sync_playwright
+
+    HAS_PLAYWRIGHT = True
+    _PLAYWRIGHT_IMPORT_ERROR: Optional[Exception] = None
+except Exception as exc:  # pragma: no cover
+    HAS_PLAYWRIGHT = False
+    _PLAYWRIGHT_IMPORT_ERROR = exc
+    PlaywrightTimeoutError = TimeoutError  # type: ignore
+    sync_playwright = None  # type: ignore
+
+
+# Re-export for consumers (e.g. cmdlets catching navigation timeouts)
+__all__ = ["HAS_PLAYWRIGHT", "PlaywrightTimeoutError", "PlaywrightTool", "PlaywrightDefaults"]
+
+
+def _get_nested(config: Dict[str, Any], *path: str) -> Any:
+    cur: Any = config
+    for key in path:
+        if not isinstance(cur, dict):
+            return None
+        cur = cur.get(key)
+    return cur
+
+
+@dataclass(slots=True)
+class PlaywrightDefaults:
+    browser: str = "chromium"  # chromium|firefox|webkit
+    headless: bool = True
+    user_agent: str = (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/120.0.0.0 Safari/537.36"
+    )
+    viewport_width: int = 1280
+    viewport_height: int = 1200
+    navigation_timeout_ms: int = 90_000
+    ignore_https_errors: bool = True
+
+
+class PlaywrightTool:
+    """Small wrapper to standardize Playwright defaults and lifecycle.
+
+    This is meant to keep cmdlets/providers from duplicating:
+    - sync_playwright start/stop
+    - browser launch/context creation
+    - user-agent/viewport defaults
+
+    Config overrides (top-level keys):
+      - playwright.browser="chromium"
+      - playwright.headless=true
+      - playwright.user_agent="..."
+      - playwright.viewport_width=1280
+      - playwright.viewport_height=1200
+      - playwright.navigation_timeout_ms=90000
+      - playwright.ignore_https_errors=true
+    """
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
+        self._config: Dict[str, Any] = dict(config or {})
+        self.defaults = self._load_defaults()
+
+    def _load_defaults(self) -> PlaywrightDefaults:
+        cfg = self._config
+        tool_block = _get_nested(cfg, "tool", "playwright")
+        if not isinstance(tool_block, dict):
+            tool_block = {}
+        pw_block = cfg.get("playwright") if isinstance(cfg.get("playwright"), dict) else {}
+        if not isinstance(pw_block, dict):
+            pw_block = {}
+
+        def _get(name: str, fallback: Any) -> Any:
+            val = tool_block.get(name)
+            if val is None:
+                val = pw_block.get(name)
+            if val is None:
+                val = cfg.get(f"playwright_{name}")
+            if val is None:
+                val = _get_nested(cfg, "playwright", name)
+            return fallback if val is None else val
+
+        browser = str(_get("browser", PlaywrightDefaults.browser)).strip().lower() or "chromium"
+        if browser not in {"chromium", "firefox", "webkit"}:
+            browser = "chromium"
+
+        headless_raw = _get("headless", PlaywrightDefaults.headless)
+        headless = bool(headless_raw)
+
+        ua = str(_get("user_agent", PlaywrightDefaults.user_agent))
+
+        def _int(name: str, fallback: int) -> int:
+            raw = _get(name, fallback)
+            try:
+                return int(raw)
+            except Exception:
+                return fallback
+
+        vw = _int("viewport_width", PlaywrightDefaults.viewport_width)
+        vh = _int("viewport_height", PlaywrightDefaults.viewport_height)
+        nav_timeout = _int("navigation_timeout_ms", PlaywrightDefaults.navigation_timeout_ms)
+
+        ignore_https = bool(_get("ignore_https_errors", PlaywrightDefaults.ignore_https_errors))
+
+        return PlaywrightDefaults(
+            browser=browser,
+            headless=headless,
+            user_agent=ua,
+            viewport_width=vw,
+            viewport_height=vh,
+            navigation_timeout_ms=nav_timeout,
+            ignore_https_errors=ignore_https,
+        )
+
+    def require(self) -> None:
+        if HAS_PLAYWRIGHT and sync_playwright is not None:
+            return
+        detail = str(_PLAYWRIGHT_IMPORT_ERROR or "playwright is not installed")
+        raise RuntimeError(
+            "playwright is required; install with: pip install playwright; then: playwright install\n"
+            f"detail: {detail}"
+        )
+
+    def open_page(
+        self,
+        *,
+        headless: Optional[bool] = None,
+        user_agent: Optional[str] = None,
+        viewport_width: Optional[int] = None,
+        viewport_height: Optional[int] = None,
+        ignore_https_errors: Optional[bool] = None,
+    ) -> Iterator[Any]:
+        """Context manager yielding a Playwright page with sane defaults."""
+        self.require()
+
+        h = self.defaults.headless if headless is None else bool(headless)
+        ua = self.defaults.user_agent if user_agent is None else str(user_agent)
+        vw = self.defaults.viewport_width if viewport_width is None else int(viewport_width)
+        vh = self.defaults.viewport_height if viewport_height is None else int(viewport_height)
+        ihe = self.defaults.ignore_https_errors if ignore_https_errors is None else bool(ignore_https_errors)
+
+        pw = None
+        browser = None
+        context = None
+        try:
+            assert sync_playwright is not None
+            pw = sync_playwright().start()
+
+            browser_type = getattr(pw, self.defaults.browser, None)
+            if browser_type is None:
+                browser_type = pw.chromium
+
+            browser = browser_type.launch(
+                headless=h,
+                args=["--disable-blink-features=AutomationControlled"],
+            )
+            context = browser.new_context(
+                user_agent=ua,
+                viewport={"width": vw, "height": vh},
+                ignore_https_errors=ihe,
+            )
+            page = context.new_page()
+            yield page
+        finally:
+            try:
+                if context is not None:
+                    context.close()
+            except Exception:
+                pass
+            try:
+                if browser is not None:
+                    browser.close()
+            except Exception:
+                pass
+            try:
+                if pw is not None:
+                    pw.stop()
+            except Exception:
+                pass
+
+    def goto(self, page: Any, url: str) -> None:
+        """Navigate with configured timeout."""
+        try:
+            page.goto(url, timeout=int(self.defaults.navigation_timeout_ms), wait_until="domcontentloaded")
+        except Exception:
+            raise
+
+    def debug_dump(self) -> None:
+        try:
+            debug(
+                f"[playwright] browser={self.defaults.browser} headless={self.defaults.headless} "
+                f"viewport={self.defaults.viewport_width}x{self.defaults.viewport_height} "
+                f"nav_timeout_ms={self.defaults.navigation_timeout_ms}"
+            )
+        except Exception:
+            pass
@@ -0,0 +1,195 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Sequence
+
+from SYS.logger import debug
+
+
+def _get_nested(config: Dict[str, Any], *path: str) -> Any:
+    cur: Any = config
+    for key in path:
+        if not isinstance(cur, dict):
+            return None
+        cur = cur.get(key)
+    return cur
+
+
+def _parse_csv_list(value: Any) -> Optional[List[str]]:
+    if value is None:
+        return None
+    if isinstance(value, list):
+        out: List[str] = []
+        for item in value:
+            s = str(item).strip()
+            if s:
+                out.append(s)
+        return out or None
+    s = str(value).strip()
+    if not s:
+        return None
+    # allow either JSON-ish list strings or simple comma-separated values
+    if s.startswith("[") and s.endswith("]"):
+        s = s[1:-1]
+    parts = [p.strip() for p in s.split(",")]
+    parts = [p for p in parts if p]
+    return parts or None
+
+
+@dataclass(slots=True)
+class YtDlpDefaults:
+    """User-tunable defaults for yt-dlp behavior.
+
+    Recommended config.conf keys (top-level dotted keys):
+      - ytdlp.video_format="bestvideo+bestaudio/best"
+      - ytdlp.audio_format="251/140/bestaudio"
+      - ytdlp.format_sort="res:2160,res:1440,res:1080,res:720,res"
+
+    Cookies:
+      - cookies="C:\\path\\cookies.txt" (already supported by config.resolve_cookies_path)
+    """
+
+    video_format: str = "bestvideo+bestaudio/best"
+    audio_format: str = "251/140/bestaudio"
+    format_sort: Optional[List[str]] = None
+
+
+class YtDlpTool:
+    """Centralizes yt-dlp defaults and translation helpers.
+
+    This is intentionally small and dependency-light so cmdlets can use it without
+    forcing a full refactor.
+    """
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None, *, script_dir: Optional[Path] = None) -> None:
+        self._config: Dict[str, Any] = dict(config or {})
+        # `resolve_cookies_path` expects the app root so it can fall back to ./cookies.txt.
+        # This file lives under ./tool/, so default to the parent directory.
+        self._script_dir = script_dir or Path(__file__).resolve().parent.parent
+        self.defaults = self._load_defaults()
+        self._cookiefile: Optional[Path] = self._init_cookiefile()
+
+    def _init_cookiefile(self) -> Optional[Path]:
+        """Resolve cookies once at tool init (yt-dlp is the primary consumer)."""
+        try:
+            from config import resolve_cookies_path
+
+            resolved = resolve_cookies_path(self._config, script_dir=self._script_dir)
+            if resolved is not None and resolved.is_file():
+                return resolved
+        except Exception:
+            pass
+        return None
+
+    def _load_defaults(self) -> YtDlpDefaults:
+        cfg = self._config
+
+        tool_block = _get_nested(cfg, "tool", "ytdlp")
+        if not isinstance(tool_block, dict):
+            tool_block = {}
+
+        ytdlp_block = cfg.get("ytdlp") if isinstance(cfg.get("ytdlp"), dict) else {}
+        if not isinstance(ytdlp_block, dict):
+            ytdlp_block = {}
+
+        # Accept both nested and flat styles.
+        video_format = (
+            tool_block.get("video_format")
+            or tool_block.get("format")
+            or ytdlp_block.get("video_format")
+            or ytdlp_block.get("video")
+            or ytdlp_block.get("format_video")
+            or cfg.get("ytdlp_video_format")
+        )
+        audio_format = (
+            tool_block.get("audio_format")
+            or ytdlp_block.get("audio_format")
+            or ytdlp_block.get("audio")
+            or ytdlp_block.get("format_audio")
+            or cfg.get("ytdlp_audio_format")
+        )
+
+        # Also accept dotted keys written as nested dicts: ytdlp.format.video, ytdlp.format.audio
+        nested_video = _get_nested(cfg, "ytdlp", "format", "video")
+        nested_audio = _get_nested(cfg, "ytdlp", "format", "audio")
+
+        fmt_sort_val = (
+            tool_block.get("format_sort")
+            or ytdlp_block.get("format_sort")
+            or ytdlp_block.get("formatSort")
+            or cfg.get("ytdlp_format_sort")
+            or _get_nested(cfg, "ytdlp", "format", "sort")
+        )
+        fmt_sort = _parse_csv_list(fmt_sort_val)
+
+        defaults = YtDlpDefaults(
+            video_format=str(nested_video or video_format or YtDlpDefaults.video_format),
+            audio_format=str(nested_audio or audio_format or YtDlpDefaults.audio_format),
+            format_sort=fmt_sort,
+        )
+
+        return defaults
+
+    def resolve_cookiefile(self) -> Optional[Path]:
+        return self._cookiefile
+
+    def default_format(self, mode: str) -> str:
+        m = str(mode or "").lower().strip()
+        if m == "audio":
+            return self.defaults.audio_format
+        return self.defaults.video_format
+
+    def build_yt_dlp_cli_args(
+        self,
+        *,
+        url: str,
+        output_dir: Optional[Path] = None,
+        ytdl_format: Optional[str] = None,
+        playlist_items: Optional[str] = None,
+        no_playlist: bool = False,
+        quiet: bool = True,
+        extra_args: Optional[Sequence[str]] = None,
+    ) -> List[str]:
+        """Build a yt-dlp command line (argv list).
+
+        This is primarily for debug output or subprocess execution.
+        """
+        argv: List[str] = ["yt-dlp"]
+        if quiet:
+            argv.extend(["--quiet", "--no-warnings"])
+        argv.append("--no-progress")
+
+        cookiefile = self.resolve_cookiefile()
+        if cookiefile is not None:
+            argv.extend(["--cookies", str(cookiefile)])
+
+        if no_playlist:
+            argv.append("--no-playlist")
+        if playlist_items:
+            argv.extend(["--playlist-items", str(playlist_items)])
+
+        fmt = (ytdl_format or "").strip()
+        if fmt:
+            # Use long form to avoid confusion with app-level flags.
+            argv.extend(["--format", fmt])
+
+        if self.defaults.format_sort:
+            for sort_key in self.defaults.format_sort:
+                argv.extend(["-S", sort_key])
+
+        if output_dir is not None:
+            outtmpl = str((output_dir / "%(title)s.%(ext)s").resolve())
+            argv.extend(["-o", outtmpl])
+
+        if extra_args:
+            argv.extend([str(a) for a in extra_args if str(a).strip()])
+
+        argv.append(str(url))
+        return argv
+
+    def debug_print_cli(self, argv: Sequence[str]) -> None:
+        try:
+            debug("yt-dlp argv: " + " ".join(str(a) for a in argv))
+        except Exception:
+            pass