dfdfsdd

2025-12-14 00:53:52 -08:00
parent 52a79b0086
commit a03eb0d1be
24 changed files with 2785 additions and 1868 deletions
@@ -12,6 +12,7 @@ import sys
 import time
 from SYS.logger import log, debug
 from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS as GLOBAL_SUPPORTED_EXTENSIONS
 import tempfile
 import logging
 from dataclasses import dataclass, field
@@ -1103,9 +1104,7 @@ SUPPORTED_FILETYPES = {
 }
 # Flatten to get all supported extensions
-ALL_SUPPORTED_EXTENSIONS = set()
+ALL_SUPPORTED_EXTENSIONS = set(GLOBAL_SUPPORTED_EXTENSIONS)
 for category_extensions in SUPPORTED_FILETYPES.values():
    ALL_SUPPORTED_EXTENSIONS.update(category_extensions.keys())
 # Global Hydrus client cache to reuse session keys
@@ -1,584 +0,0 @@
 """Archive.org API client for borrowing and downloading books.
 This module provides low-level functions for interacting with Archive.org:
 - Authentication (login, credential management)
 - Borrowing (loan, return_loan)
 - Book metadata extraction (get_book_infos, get_book_metadata)
 - Image downloading and deobfuscation
 - PDF creation with metadata
 Used by Provider/openlibrary.py for the borrowing workflow.
 """
 from __future__ import annotations
 import base64
 import hashlib
 import logging
 import os
 import re
 import sys
 import time
 from concurrent import futures
 from typing import Any, Dict, List, Optional, Sequence, Tuple
 import requests
 from SYS.logger import log, debug
 try:
    from Crypto.Cipher import AES  # type: ignore
    from Crypto.Util import Counter  # type: ignore
 except ImportError:
    AES = None  # type: ignore
    Counter = None  # type: ignore
 try:
    from tqdm import tqdm  # type: ignore
 except ImportError:
    tqdm = None  # type: ignore
 def credential_openlibrary(config: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
    """Get OpenLibrary/Archive.org email and password from config.
    Supports both formats:
    - New: {"provider": {"openlibrary": {"email": "...", "password": "..."}}}
    - Old: {"Archive": {"email": "...", "password": "..."}}
           {"archive_org_email": "...", "archive_org_password": "..."}
    Returns: (email, password) tuple, each can be None
    """
    if not isinstance(config, dict):
        return None, None
    # Try new format first
    provider_config = config.get("provider", {})
    if isinstance(provider_config, dict):
        openlibrary_config = provider_config.get("openlibrary", {})
        if isinstance(openlibrary_config, dict):
            email = openlibrary_config.get("email")
            password = openlibrary_config.get("password")
            if email or password:
                return email, password
    # Try old nested format
    archive_config = config.get("Archive")
    if isinstance(archive_config, dict):
        email = archive_config.get("email")
        password = archive_config.get("password")
        if email or password:
            return email, password
    # Fall back to old flat format
    email = config.get("archive_org_email")
    password = config.get("archive_org_password")
    return email, password
 class BookNotAvailableError(Exception):
    """Raised when a book is not available for borrowing (waitlisted/in use)."""
    pass
 def display_error(response: requests.Response, message: str) -> None:
    """Display error and exit."""
    log(message, file=sys.stderr)
    log(response.text, file=sys.stderr)
    sys.exit(1)
 def login(email: str, password: str) -> requests.Session:
    """Login to archive.org.
    Args:
        email: Archive.org email
        password: Archive.org password
    Returns:
        Authenticated requests.Session
    Raises:
        SystemExit on login failure
    """
    session = requests.Session()
    session.get("https://archive.org/account/login", timeout=30)
    data = {"username": email, "password": password}
    response = session.post("https://archive.org/account/login", data=data, timeout=30)
    if "bad_login" in response.text:
        log("Invalid credentials!", file=sys.stderr)
        sys.exit(1)
    if "Successful login" in response.text:
        debug("Successful login")
        return session
    display_error(response, "[-] Error while login:")
    sys.exit(1)  # Unreachable but satisfies type checker
 def loan(session: requests.Session, book_id: str, verbose: bool = True) -> requests.Session:
    """Borrow a book from archive.org (14-day loan).
    Args:
        session: Authenticated requests.Session from login()
        book_id: Archive.org book identifier (e.g., 'ia_book_id')
        verbose: Whether to log messages
    Returns:
        Session with active loan
    Raises:
        SystemExit on loan failure
    """
    data = {"action": "grant_access", "identifier": book_id}
    response = session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
    data["action"] = "browse_book"
    response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
    if response.status_code == 400:
        try:
            if response.json()["error"] == "This book is not available to borrow at this time. Please try again later.":
                debug("Book is not available for borrowing (waitlisted or in use)")
                raise BookNotAvailableError("Book is waitlisted or in use")
            display_error(response, "Something went wrong when trying to borrow the book.")
        except BookNotAvailableError:
            raise
        except:
            display_error(response, "The book cannot be borrowed")
    data["action"] = "create_token"
    response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
    if "token" in response.text:
        if verbose:
            debug("Successful loan")
        return session
    display_error(response, "Something went wrong when trying to borrow the book.")
    sys.exit(1)  # Unreachable but satisfies type checker
 def return_loan(session: requests.Session, book_id: str) -> None:
    """Return a borrowed book.
    Args:
        session: Authenticated requests.Session with active loan
        book_id: Archive.org book identifier
    """
    data = {"action": "return_loan", "identifier": book_id}
    response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
    if response.status_code == 200 and response.json()["success"]:
        debug("Book returned")
    else:
        display_error(response, "Something went wrong when trying to return the book")
 def get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
    """Extract book information and page links from archive.org viewer.
    Args:
        session: Authenticated requests.Session
        url: Book URL (e.g., https://archive.org/borrow/book_id or /details/book_id)
    Returns:
        Tuple of (title, page_links, metadata)
    Raises:
        RuntimeError: If page data cannot be extracted
    """
    r = session.get(url, timeout=30).text
    # Try to extract the infos URL from the response
    try:
        # Look for the "url" field in the response using regex
        # Matches "url":"//archive.org/..."
        import re
        match = re.search(r'"url"\s*:\s*"([^"]+)"', r)
        if not match:
             raise ValueError("No 'url' field found in response")
        url_path = match.group(1)
        if url_path.startswith("//"):
            infos_url = "https:" + url_path
        else:
            infos_url = url_path
        infos_url = infos_url.replace("\\u0026", "&")
    except (IndexError, ValueError, AttributeError) as e:
        # If URL extraction fails, raise with better error message
        raise RuntimeError(f"Failed to extract book info URL from response: {e}")
    response = session.get(infos_url, timeout=30)
    data = response.json()["data"]
    title = data["brOptions"]["bookTitle"].strip().replace(" ", "_")
    title = "".join(c for c in title if c not in '<>:"/\\|?*')  # Filter forbidden chars
    title = title[:150]  # Trim to avoid long file names
    metadata = data["metadata"]
    links = []
    # Safely extract page links from brOptions data
    try:
        br_data = data.get("brOptions", {}).get("data", [])
        for item in br_data:
            if isinstance(item, list):
                for page in item:
                    if isinstance(page, dict) and "uri" in page:
                        links.append(page["uri"])
            elif isinstance(item, dict) and "uri" in item:
                links.append(item["uri"])
    except (KeyError, IndexError, TypeError) as e:
        log(f"Warning: Error parsing page links: {e}", file=sys.stderr)
        # Continue with whatever links we found
    if len(links) > 1:
        debug(f"Found {len(links)} pages")
        return title, links, metadata
    elif len(links) == 1:
        debug(f"Found {len(links)} page")
        return title, links, metadata
    else:
        log("Error while getting image links - no pages found", file=sys.stderr)
        raise RuntimeError("No pages found in book data")
 def image_name(pages: int, page: int, directory: str) -> str:
    """Generate image filename for page.
    Args:
        pages: Total number of pages
        page: Current page number (0-indexed)
        directory: Directory to save to
    Returns:
        Full path to image file
    """
    return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
 def deobfuscate_image(image_data: bytes, link: str, obf_header: str) -> bytes:
    """Decrypt obfuscated image data using AES-CTR.
    This handles Archive.org's image obfuscation for borrowed books.
    Based on: https://github.com/justimm
    Args:
        image_data: Encrypted image bytes
        link: Image URL (used to derive AES key)
        obf_header: X-Obfuscate header value (format: "1|BASE64_COUNTER")
    Returns:
        Decrypted image bytes
    """
    if not AES or not Counter:
        raise RuntimeError("Crypto library not available")
    try:
        version, counter_b64 = obf_header.split("|")
    except Exception as e:
        raise ValueError("Invalid X-Obfuscate header format") from e
    if version != "1":
        raise ValueError("Unsupported obfuscation version: " + version)
    # Derive AES key from URL
    aesKey = re.sub(r"^https?:\/\/.*?\/", "/", link)
    sha1_digest = hashlib.sha1(aesKey.encode("utf-8")).digest()
    key = sha1_digest[:16]
    # Decode counter
    counter_bytes = base64.b64decode(counter_b64)
    if len(counter_bytes) != 16:
        raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
    prefix = counter_bytes[:8]
    initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
    # Create AES-CTR cipher
    ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False)  # type: ignore
    cipher = AES.new(key, AES.MODE_CTR, counter=ctr)  # type: ignore
    decrypted_part = cipher.decrypt(image_data[:1024])
    new_data = decrypted_part + image_data[1024:]
    return new_data
 def download_one_image(
    session: requests.Session,
    link: str,
    i: int,
    directory: str,
    book_id: str,
    pages: int,
 ) -> None:
    """Download a single book page image.
    Handles obfuscated images and re-borrowing on 403 errors.
    Args:
        session: Authenticated requests.Session
        link: Direct image URL
        i: Page index (0-based)
        directory: Directory to save to
        book_id: Archive.org book ID (for re-borrowing on 403)
        pages: Total number of pages
    """
    headers = {
        "Referer": "https://archive.org/",
        "Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
        "Sec-Fetch-Site": "same-site",
        "Sec-Fetch-Mode": "no-cors",
        "Sec-Fetch-Dest": "image",
    }
    retry = True
    response = None
    while retry:
        try:
            response = session.get(link, headers=headers, timeout=30)
            if response.status_code == 403:
                session = loan(session, book_id, verbose=False)
                raise Exception("Borrow again")
            if response.status_code == 200:
                retry = False
        except:
            time.sleep(1)
    image = image_name(pages, i, directory)
    if response is None:
        log(f"Failed to download page {i}", file=sys.stderr)
        return
    obf_header = response.headers.get("X-Obfuscate")
    image_content = None
    if obf_header:
        try:
            image_content = deobfuscate_image(response.content, link, obf_header)
        except Exception as e:
            log(f"Deobfuscation failed: {e}", file=sys.stderr)
            return
    else:
        image_content = response.content
    with open(image, "wb") as f:
        f.write(image_content)
 def download(
    session: requests.Session,
    n_threads: int,
    directory: str,
    links: List[str],
    scale: int,
    book_id: str,
 ) -> List[str]:
    """Download all book pages as images.
    Uses thread pool for parallel downloads.
    Args:
        session: Authenticated requests.Session
        n_threads: Number of download threads
        directory: Directory to save images to
        links: List of image url
        scale: Image resolution (0=highest, 10=lowest)
        book_id: Archive.org book ID (for re-borrowing)
    Returns:
        List of downloaded image file paths
    """
    debug("Downloading pages...")
    links = [f"{link}&rotate=0&scale={scale}" for link in links]
    pages = len(links)
    tasks = []
    with futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
        for link in links:
            i = links.index(link)
            tasks.append(
                executor.submit(
                    download_one_image,
                    session=session,
                    link=link,
                    i=i,
                    directory=directory,
                    book_id=book_id,
                    pages=pages,
                )
            )
        if tqdm:
            for _ in tqdm(futures.as_completed(tasks), total=len(tasks)):  # type: ignore
                pass
        else:
            for _ in futures.as_completed(tasks):
                pass
    images = [image_name(pages, i, directory) for i in range(len(links))]
    return images
 def check_direct_download(book_id: str) -> Tuple[bool, str]:
    """Check if a book can be downloaded directly without borrowing.
    Searches Archive.org metadata for downloadable PDF files.
    Args:
        book_id: Archive.org book identifier
    Returns:
        Tuple of (can_download: bool, pdf_url: str)
    """
    try:
        # First, try to get the metadata to find the actual PDF filename
        metadata_url = f"https://archive.org/metadata/{book_id}"
        response = requests.get(metadata_url, timeout=10)
        response.raise_for_status()
        metadata = response.json()
        # Find PDF file in files list
        if "files" in metadata:
            for file_info in metadata["files"]:
                filename = file_info.get("name", "")
                if filename.endswith(".pdf") and file_info.get("source") == "original":
                    # Found the original PDF
                    pdf_filename = filename
                    pdf_url = f"https://archive.org/download/{book_id}/{pdf_filename.replace(' ', '%20')}"
                    # Verify it's accessible
                    check_response = requests.head(pdf_url, timeout=5, allow_redirects=True)
                    if check_response.status_code == 200:
                        return True, pdf_url
        return False, ""
    except Exception as e:
        log(f"Error checking direct download: {e}", file=sys.stderr)
        return False, ""
 def get_openlibrary_by_isbn(isbn: str) -> Dict[str, Any]:
    """Fetch book data from OpenLibrary using ISBN.
    Args:
        isbn: ISBN-10 or ISBN-13 to search for
    Returns:
        Dictionary with book metadata from OpenLibrary
    """
    try:
        # Try ISBN API first
        api_url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn}&jscmd=data&format=json"
        response = requests.get(api_url, timeout=10)
        response.raise_for_status()
        data = response.json()
        if data:
            # Get first result
            key = list(data.keys())[0]
            return data[key]
        return {}
    except Exception as e:
        log(f"Error fetching OpenLibrary data by ISBN: {e}", file=sys.stderr)
        return {}
 def extract_isbn_from_metadata(metadata: Dict[str, Any]) -> str:
    """Extract ISBN from archive.org metadata.
    Looks for ISBN in various metadata fields.
    Args:
        metadata: Archive.org metadata dictionary
    Returns:
        ISBN string (clean, no hyphens) or empty string if not found
    """
    # Try various common metadata fields
    isbn_fields = [
        "isbn", "ISBN", "isbn_13", "isbn_10", "isbns",
        "isbn-10", "isbn-13", "identifer_isbn"
    ]
    for field in isbn_fields:
        if field in metadata:
            isbn_val = metadata[field]
            if isinstance(isbn_val, list):
                isbn_val = isbn_val[0] if isbn_val else None
            if isbn_val and isinstance(isbn_val, str):
                # Clean ISBN (remove hyphens, spaces)
                isbn_clean = isbn_val.replace("-", "").replace(" ", "")
                if len(isbn_clean) in [10, 13]:
                    return isbn_clean
    return ""
 def normalize_url(url: str) -> str:
    """Convert openlibrary.org URL to archive.org URL.
    Looks up the actual Archive.org ID from OpenLibrary API.
    Args:
        url: Book URL (archive.org or openlibrary.org format)
    Returns:
        Normalized archive.org URL
    """
    url = url.strip()
    # Already archive.org format
    if url.startswith("https://archive.org/details/"):
        return url
    # Convert openlibrary.org format by querying the OpenLibrary API
    if "openlibrary.org/books/" in url:
        try:
            # Extract the book ID (e.g., OL6796852M)
            parts = url.split("/books/")
            if len(parts) > 1:
                book_id = parts[1].split("/")[0]
                # Query OpenLibrary API to get the book metadata
                api_url = f"https://openlibrary.org/books/{book_id}.json"
                response = requests.get(api_url, timeout=10)
                response.raise_for_status()
                data = response.json()
                # Look for identifiers including internet_archive or ocaid
                # First try ocaid (Open Content Alliance ID) - this is most common
                if "ocaid" in data:
                    ocaid = data["ocaid"]
                    return f"https://archive.org/details/{ocaid}"
                # Check for identifiers object
                if "identifiers" in data:
                    identifiers = data["identifiers"]
                    # Look for internet_archive ID
                    if "internet_archive" in identifiers:
                        ia_ids = identifiers["internet_archive"]
                        if isinstance(ia_ids, list) and ia_ids:
                            ia_id = ia_ids[0]
                        else:
                            ia_id = ia_ids
                        return f"https://archive.org/details/{ia_id}"
                # If no IA identifier found, use the book ID as fallback
                log(f"No Internet Archive ID found for {book_id}. Attempting with OpenLibrary ID.", file=sys.stderr)
                return f"https://archive.org/details/{book_id}"
        except requests.RequestException as e:
            log(f"Could not fetch OpenLibrary metadata: {e}", file=sys.stderr)
            # Fallback to using the book ID directly
            parts = url.split("/books/")
            if len(parts) > 1:
                book_id = parts[1].split("/")[0]
                return f"https://archive.org/details/{book_id}"
        except (KeyError, IndexError) as e:
            log(f"Error parsing OpenLibrary response: {e}", file=sys.stderr)
            # Fallback to using the book ID directly
            parts = url.split("/books/")
            if len(parts) > 1:
                book_id = parts[1].split("/")[0]
                return f"https://archive.org/details/{book_id}"
    # Return original if can't parse
    return url
@@ -407,38 +407,53 @@ class API_folder_store:
            logger.error(f"Error clearing worker log for {worker_id}: {exc}", exc_info=True)
    def _migrate_metadata_schema(self, cursor) -> None:
-        """Import legacy metadata from old schema if present. Existing hash-based schema is ready to use."""
+        """Ensure metadata schema is up-to-date.
        - If a legacy schema is detected, attempt to import/upgrade (best-effort).
        - If the hash-based schema exists, add any missing columns expected by current code.
        """
        try:
            # Check if this is a fresh new database (hash-based schema)
            cursor.execute('PRAGMA table_info(metadata)')
            existing_columns = {row[1] for row in cursor.fetchall()}
-            # If hash column exists, we're already on the new schema
+            # Legacy migration: If old schema exists, try to import data.
            if 'hash' in existing_columns:
                logger.info("Database is already using hash-based schema - no migration needed")
                return
            # Legacy migration: If old schema exists, try to import data
            # Old schema would have had: id (INTEGER PRIMARY KEY), file_hash (TEXT), etc.
            if 'hash' not in existing_columns:
                if 'id' in existing_columns and 'file_hash' in existing_columns:
                    logger.info("Detected legacy metadata schema - importing to new hash-based schema")
-                # This would be complex legacy migration - for now just note it
+                    # This would be complex legacy migration - for now just note it.
                    logger.info("Legacy metadata table detected but import not yet implemented")
                    return
-            # Add any missing columns to the new schema
+                # Unknown/unsupported schema; nothing we can safely do here.
-            for col_name, col_def in [('size', 'INTEGER'), ('ext', 'TEXT'),
+                return
-                                      ('type', 'TEXT'),
+
-                                      ('time_imported', 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP'),
+            # Hash-based schema exists: add any missing columns expected by current code.
-                                      ('time_modified', 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP')]:
+            # These are safe ALTER TABLE additions for older DBs.
            column_specs = {
                'size': 'INTEGER',
                'ext': 'TEXT',
                'type': 'TEXT',
                'url': 'TEXT',
                'relationships': 'TEXT',
                'duration': 'REAL',
                'time_imported': 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP',
                'time_modified': 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP',
                'created_at': 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP',
                'updated_at': 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP',
            }
            for col_name, col_def in column_specs.items():
                if col_name not in existing_columns:
                    try:
                        cursor.execute(f"ALTER TABLE metadata ADD COLUMN {col_name} {col_def}")
                        existing_columns.add(col_name)
                        logger.info(f"Added '{col_name}' column to metadata table")
                    except Exception as e:
                        logger.debug(f"Column '{col_name}' may already exist: {e}")
-            # Populate type column from ext if not already populated
+            # Populate type column from ext if not already populated.
            if 'type' in existing_columns and 'ext' in existing_columns:
                try:
                    from SYS.utils_constant import get_type_from_ext
@@ -929,6 +944,13 @@ class API_folder_store:
            if not fields:
                return
            # Ensure a metadata row exists so updates don't silently no-op.
            # This can happen for older DBs or entries created without explicit metadata.
            cursor.execute(
                "INSERT OR IGNORE INTO metadata (hash) VALUES (?)",
                (file_hash,),
            )
            values.append(file_hash)
            sql = f"UPDATE metadata SET {', '.join(fields)}, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE hash = ?"
@@ -1681,6 +1703,84 @@ class DatabaseAPI:
        )
        return {row[0] for row in cursor.fetchall()}
    def get_file_hashes_with_any_url(self, limit: Optional[int] = None) -> Set[str]:
        """Get hashes of files that have any non-empty URL metadata."""
        cursor = self.get_cursor()
        cursor.execute(
            """
            SELECT DISTINCT f.hash
            FROM files f
            JOIN metadata m ON f.hash = m.hash
            WHERE m.url IS NOT NULL
              AND TRIM(m.url) != ''
              AND TRIM(m.url) != '[]'
            LIMIT ?
            """,
            (limit or 10000,),
        )
        return {row[0] for row in cursor.fetchall()}
    def get_file_hashes_by_url_like(self, like_pattern: str, limit: Optional[int] = None) -> Set[str]:
        """Get hashes of files whose URL metadata contains a substring (case-insensitive)."""
        cursor = self.get_cursor()
        cursor.execute(
            """
            SELECT DISTINCT f.hash
            FROM files f
            JOIN metadata m ON f.hash = m.hash
            WHERE m.url IS NOT NULL
              AND LOWER(m.url) LIKE ?
            LIMIT ?
            """,
            (like_pattern.lower(), limit or 10000),
        )
        return {row[0] for row in cursor.fetchall()}
    def get_files_with_any_url(self, limit: Optional[int] = None) -> List[tuple]:
        """Get files that have any non-empty URL metadata.
        Returns (hash, file_path, size, ext) tuples.
        """
        cursor = self.get_cursor()
        cursor.execute(
            """
            SELECT f.hash, f.file_path,
                   COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
                   COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext
            FROM files f
            JOIN metadata m ON f.hash = m.hash
            WHERE m.url IS NOT NULL
              AND TRIM(m.url) != ''
              AND TRIM(m.url) != '[]'
            ORDER BY f.file_path
            LIMIT ?
            """,
            (limit or 10000,),
        )
        return cursor.fetchall()
    def get_files_by_url_like(self, like_pattern: str, limit: Optional[int] = None) -> List[tuple]:
        """Get files whose URL metadata contains a substring (case-insensitive).
        Returns (hash, file_path, size, ext) tuples.
        """
        cursor = self.get_cursor()
        cursor.execute(
            """
            SELECT f.hash, f.file_path,
                   COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
                   COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext
            FROM files f
            JOIN metadata m ON f.hash = m.hash
            WHERE m.url IS NOT NULL
              AND LOWER(m.url) LIKE ?
            ORDER BY f.file_path
            LIMIT ?
            """,
            (like_pattern.lower(), limit or 10000),
        )
        return cursor.fetchall()
    def get_file_metadata(self, file_hashes: Set[str], limit: Optional[int] = None) -> List[tuple]:
        """Get metadata for files given their hashes. Returns (hash, file_path, size, extension) tuples."""
        if not file_hashes:
@@ -1498,6 +1498,9 @@ def _execute_pipeline(tokens: list):
                                elif table_type == 'soulseek':
                                    print(f"Auto-piping Soulseek selection to download-file")
                                    stages.append(['download-file'])
                                elif table_type == 'openlibrary':
                                    print(f"Auto-piping OpenLibrary selection to download-file")
                                    stages.append(['download-file'])
                                elif source_cmd == 'search-file' and source_args and 'youtube' in source_args:
                                    # Legacy check
                                    print(f"Auto-piping YouTube selection to .pipe")
@@ -1667,6 +1670,35 @@ def _execute_pipeline(tokens: list):
                                    filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
                                    piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0]
                                    print(f"Selected {len(filtered)} item(s) using {cmd_name}")
                                    # If selection is the last stage and looks like a provider result,
                                    # auto-initiate the borrow/download flow.
                                    if stage_index + 1 >= len(stages):
                                        try:
                                            from ProviderCore.registry import get_search_provider as _get_search_provider
                                        except Exception:
                                            _get_search_provider = None
                                        if _get_search_provider is not None:
                                            selected_list = filtered_pipe_objs
                                            provider_table: Optional[str] = None
                                            try:
                                                for obj in selected_list:
                                                    extra = getattr(obj, "extra", None)
                                                    if isinstance(extra, dict) and extra.get("table"):
                                                        provider_table = str(extra.get("table"))
                                                        break
                                            except Exception:
                                                provider_table = None
                                            if provider_table:
                                                try:
                                                    provider = _get_search_provider(provider_table, config)
                                                except Exception:
                                                    provider = None
                                                if provider is not None:
                                                    print("Auto-downloading selection via download-file")
                                                    stages.append(["download-file"])
                                    continue
                                else:
                                    print(f"No items matched selection {cmd_name}\n")
@@ -1736,13 +1768,14 @@ def _execute_pipeline(tokens: list):
                                }
                                # Display-only commands (just show data, don't modify or search)
                                display_only_commands = {
-                                    'get-url', 'get_url', 'get-note', 'get_note',
+                                    'get-note', 'get_note',
                                    'get-relationship', 'get_relationship', 'get-file', 'get_file',
                                    'check-file-status', 'check_file_status'
                                }
                                # Commands that manage their own table/history state (e.g. get-tag)
                                self_managing_commands = {
                                    'get-tag', 'get_tag', 'tags',
                                    'get-url', 'get_url',
                                    'search-file', 'search_file'
                                }
@@ -1,19 +1,38 @@
 from __future__ import annotations
 import base64
 from concurrent import futures
 import hashlib
 import json as json_module
 import re
 import shutil
 import sys
 import tempfile
 import time
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 import requests
 from API.HTTP import HTTPClient
 from ProviderCore.base import SearchProvider, SearchResult
 from ProviderCore.download import download_file, sanitize_filename
 from cli_syntax import get_field, get_free_text, parse_query
 from SYS.logger import log
 from SYS.utils import unique_path
 try:
    from Crypto.Cipher import AES  # type: ignore
    from Crypto.Util import Counter  # type: ignore
 except ImportError:
    AES = None  # type: ignore
    Counter = None  # type: ignore
 try:
    from tqdm import tqdm  # type: ignore
 except ImportError:
    tqdm = None  # type: ignore
 def _looks_like_isbn(text: str) -> bool:
    t = (text or "").replace("-", "").strip()
@@ -38,6 +57,13 @@ def _resolve_edition_id(doc: Dict[str, Any]) -> str:
    edition_key = doc.get("edition_key")
    if isinstance(edition_key, list) and edition_key:
        return str(edition_key[0]).strip()
    if isinstance(edition_key, str) and edition_key.strip():
        return edition_key.strip()
    # Often present even when edition_key is missing.
    cover_edition_key = doc.get("cover_edition_key")
    if isinstance(cover_edition_key, str) and cover_edition_key.strip():
        return cover_edition_key.strip()
    # Fallback: sometimes key can be /books/OL...M
    key = doc.get("key")
@@ -54,7 +80,7 @@ def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, s
            return False, "not-an-edition"
        url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{edition_id}"
-        resp = session.get(url, timeout=10)
+        resp = session.get(url, timeout=6)
        resp.raise_for_status()
        data = resp.json() or {}
        wrapped = data.get(f"OLID:{edition_id}")
@@ -88,7 +114,7 @@ def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidate
    # Otherwise query the edition JSON.
    try:
-        resp = session.get(f"https://openlibrary.org/books/{edition_id}.json", timeout=10)
+        resp = session.get(f"https://openlibrary.org/books/{edition_id}.json", timeout=6)
        resp.raise_for_status()
        data = resp.json() or {}
@@ -116,6 +142,522 @@ class OpenLibrary(SearchProvider):
        super().__init__(config)
        self._session = requests.Session()
    class BookNotAvailableError(Exception):
        """Raised when a book is not available for borrowing (waitlisted/in use)."""
    @staticmethod
    def _credential_archive(config: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
        """Get Archive.org email/password from config.
        Supports:
        - New: {"provider": {"openlibrary": {"email": "...", "password": "..."}}}
        - Old: {"Archive": {"email": "...", "password": "..."}}
               {"archive_org_email": "...", "archive_org_password": "..."}
        """
        if not isinstance(config, dict):
            return None, None
        provider_config = config.get("provider", {})
        if isinstance(provider_config, dict):
            openlibrary_config = provider_config.get("openlibrary", {})
            if isinstance(openlibrary_config, dict):
                email = openlibrary_config.get("email")
                password = openlibrary_config.get("password")
                if email or password:
                    return str(email) if email is not None else None, str(password) if password is not None else None
        archive_config = config.get("Archive")
        if isinstance(archive_config, dict):
            email = archive_config.get("email")
            password = archive_config.get("password")
            if email or password:
                return str(email) if email is not None else None, str(password) if password is not None else None
        email = config.get("archive_org_email")
        password = config.get("archive_org_password")
        return str(email) if email is not None else None, str(password) if password is not None else None
    @staticmethod
    def _archive_error_body(response: requests.Response) -> str:
        try:
            body = response.text or ""
        except Exception:
            return ""
        if len(body) > 2000:
            return body[:1200] + "\n... (truncated) ...\n" + body[-400:]
        return body
    @classmethod
    def _archive_login(cls, email: str, password: str) -> requests.Session:
        """Login to archive.org using the token-based services endpoint (matches test-login.py)."""
        session = requests.Session()
        token_resp = session.get("https://archive.org/services/account/login/", timeout=30)
        try:
            token_json = token_resp.json()
        except Exception as exc:
            raise RuntimeError(f"Archive login token parse failed: {exc}\n{cls._archive_error_body(token_resp)}")
        if not token_json.get("success"):
            raise RuntimeError(f"Archive login token fetch failed\n{cls._archive_error_body(token_resp)}")
        token = (token_json.get("value") or {}).get("token")
        if not token:
            raise RuntimeError("Archive login token missing")
        headers = {"Content-Type": "application/x-www-form-urlencoded"}
        payload = {"username": email, "password": password, "t": token}
        login_resp = session.post(
            "https://archive.org/services/account/login/",
            headers=headers,
            data=json_module.dumps(payload),
            timeout=30,
        )
        try:
            login_json = login_resp.json()
        except Exception as exc:
            raise RuntimeError(f"Archive login parse failed: {exc}\n{cls._archive_error_body(login_resp)}")
        if login_json.get("success") is False:
            if login_json.get("value") == "bad_login":
                raise RuntimeError("Invalid Archive.org credentials")
            raise RuntimeError(f"Archive login failed: {login_json}")
        return session
    @classmethod
    def _archive_loan(cls, session: requests.Session, book_id: str, *, verbose: bool = True) -> requests.Session:
        data = {"action": "grant_access", "identifier": book_id}
        session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
        data["action"] = "browse_book"
        response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
        if response.status_code == 400:
            try:
                err = (response.json() or {}).get("error")
                if err == "This book is not available to borrow at this time. Please try again later.":
                    raise cls.BookNotAvailableError("Book is waitlisted or in use")
                raise RuntimeError(f"Borrow failed: {err or response.text}")
            except cls.BookNotAvailableError:
                raise
            except Exception:
                raise RuntimeError("The book cannot be borrowed")
        data["action"] = "create_token"
        response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
        if "token" in (response.text or ""):
            return session
        raise RuntimeError("Something went wrong when trying to borrow the book")
    @staticmethod
    def _archive_return_loan(session: requests.Session, book_id: str) -> None:
        data = {"action": "return_loan", "identifier": book_id}
        response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
        if response.status_code == 200:
            try:
                if (response.json() or {}).get("success"):
                    return
            except Exception:
                pass
        raise RuntimeError("Something went wrong when trying to return the book")
    @staticmethod
    def _archive_get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
        """Extract page links from Archive.org book reader."""
        r = session.get(url, timeout=30).text
        # Matches: "url":"//archive.org/..." (allow whitespace)
        match = re.search(r'"url"\s*:\s*"([^"]+)"', r)
        if not match:
            raise RuntimeError("Failed to extract book info URL from response")
        url_path = match.group(1)
        infos_url = ("https:" + url_path) if url_path.startswith("//") else url_path
        infos_url = infos_url.replace("\\u0026", "&")
        response = session.get(infos_url, timeout=30)
        payload = response.json()
        data = payload["data"]
        title = str(data["brOptions"]["bookTitle"]).strip().replace(" ", "_")
        title = "".join(c for c in title if c not in '<>:"/\\|?*')
        title = title[:150]
        metadata = data.get("metadata") or {}
        links: List[str] = []
        br_data = (data.get("brOptions") or {}).get("data", [])
        if isinstance(br_data, list):
            for item in br_data:
                if isinstance(item, list):
                    for page in item:
                        if isinstance(page, dict) and "uri" in page:
                            links.append(page["uri"])
                elif isinstance(item, dict) and "uri" in item:
                    links.append(item["uri"])
        if not links:
            raise RuntimeError("No pages found in book data")
        return title, links, metadata if isinstance(metadata, dict) else {}
    @staticmethod
    def _archive_image_name(pages: int, page: int, directory: str) -> str:
        return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
    @staticmethod
    def _archive_deobfuscate_image(image_data: bytes, link: str, obf_header: str) -> bytes:
        if not AES or not Counter:
            raise RuntimeError("Crypto library not available")
        try:
            version, counter_b64 = obf_header.split("|")
        except Exception as exc:
            raise ValueError("Invalid X-Obfuscate header format") from exc
        if version != "1":
            raise ValueError("Unsupported obfuscation version: " + version)
        aes_key = re.sub(r"^https?:\/\/.*?\/", "/", link)
        sha1_digest = hashlib.sha1(aes_key.encode("utf-8")).digest()
        key = sha1_digest[:16]
        counter_bytes = base64.b64decode(counter_b64)
        if len(counter_bytes) != 16:
            raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
        prefix = counter_bytes[:8]
        initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
        ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False)  # type: ignore
        cipher = AES.new(key, AES.MODE_CTR, counter=ctr)  # type: ignore
        decrypted_part = cipher.decrypt(image_data[:1024])
        return decrypted_part + image_data[1024:]
    @classmethod
    def _archive_download_one_image(
        cls,
        session: requests.Session,
        link: str,
        i: int,
        directory: str,
        book_id: str,
        pages: int,
    ) -> None:
        headers = {
            "Referer": "https://archive.org/",
            "Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
            "Sec-Fetch-Site": "same-site",
            "Sec-Fetch-Mode": "no-cors",
            "Sec-Fetch-Dest": "image",
        }
        while True:
            try:
                response = session.get(link, headers=headers, timeout=30)
                if response.status_code == 403:
                    cls._archive_loan(session, book_id, verbose=False)
                    raise RuntimeError("Borrow again")
                if response.status_code == 200:
                    break
            except Exception:
                time.sleep(1)
        image = cls._archive_image_name(pages, i, directory)
        obf_header = response.headers.get("X-Obfuscate")
        if obf_header:
            image_content = cls._archive_deobfuscate_image(response.content, link, obf_header)
        else:
            image_content = response.content
        with open(image, "wb") as f:
            f.write(image_content)
    @classmethod
    def _archive_download(
        cls,
        session: requests.Session,
        n_threads: int,
        directory: str,
        links: List[str],
        scale: int,
        book_id: str,
    ) -> List[str]:
        links_scaled = [f"{link}&rotate=0&scale={scale}" for link in links]
        pages = len(links_scaled)
        tasks = []
        with futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
            for i, link in enumerate(links_scaled):
                tasks.append(
                    executor.submit(
                        cls._archive_download_one_image,
                        session=session,
                        link=link,
                        i=i,
                        directory=directory,
                        book_id=book_id,
                        pages=pages,
                    )
                )
            if tqdm:
                for _ in tqdm(futures.as_completed(tasks), total=len(tasks)):  # type: ignore
                    pass
            else:
                for _ in futures.as_completed(tasks):
                    pass
        return [cls._archive_image_name(pages, i, directory) for i in range(pages)]
    @staticmethod
    def _archive_check_direct_download(book_id: str) -> Tuple[bool, str]:
        """Check for a directly downloadable original PDF in Archive.org metadata."""
        try:
            metadata_url = f"https://archive.org/metadata/{book_id}"
            response = requests.get(metadata_url, timeout=6)
            response.raise_for_status()
            metadata = response.json()
            files = metadata.get("files") if isinstance(metadata, dict) else None
            if isinstance(files, list):
                for file_info in files:
                    if not isinstance(file_info, dict):
                        continue
                    filename = str(file_info.get("name", ""))
                    if filename.endswith(".pdf") and file_info.get("source") == "original":
                        pdf_url = f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
                        check_response = requests.head(pdf_url, timeout=4, allow_redirects=True)
                        if check_response.status_code == 200:
                            return True, pdf_url
            return False, ""
        except Exception:
            return False, ""
    @staticmethod
    def scrape_isbn_metadata(isbn: str) -> List[str]:
        """Scrape tags for an ISBN using Open Library API.
        Returns tags such as:
        - title:<...>, author:<...>, publish_date:<...>, publisher:<...>, description:<...>, pages:<...>
        - identifiers: openlibrary:<...>, lccn:<...>, oclc:<...>, goodreads:<...>, librarything:<...>, doi:<...>, internet_archive:<...>
        """
        new_tags: List[str] = []
        isbn_clean = str(isbn or "").replace("isbn:", "").replace("-", "").strip()
        if not isbn_clean:
            return []
        url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
        try:
            with HTTPClient() as client:
                response = client.get(url)
                response.raise_for_status()
                data = json_module.loads(response.content.decode("utf-8"))
        except Exception as exc:
            log(f"Failed to fetch ISBN metadata: {exc}", file=sys.stderr)
            return []
        if not data:
            log(f"No ISBN metadata found for: {isbn}")
            return []
        book_data = next(iter(data.values()), None)
        if not isinstance(book_data, dict):
            return []
        if "title" in book_data:
            new_tags.append(f"title:{book_data['title']}")
        authors = book_data.get("authors")
        if isinstance(authors, list):
            for author in authors[:3]:
                if isinstance(author, dict) and author.get("name"):
                    new_tags.append(f"author:{author['name']}")
        if book_data.get("publish_date"):
            new_tags.append(f"publish_date:{book_data['publish_date']}")
        publishers = book_data.get("publishers")
        if isinstance(publishers, list) and publishers:
            pub = publishers[0]
            if isinstance(pub, dict) and pub.get("name"):
                new_tags.append(f"publisher:{pub['name']}")
        if "description" in book_data:
            desc = book_data.get("description")
            if isinstance(desc, dict) and "value" in desc:
                desc = desc.get("value")
            if desc:
                desc_str = str(desc).strip()
                if desc_str:
                    new_tags.append(f"description:{desc_str[:200]}")
        page_count = book_data.get("number_of_pages")
        if isinstance(page_count, int) and page_count > 0:
            new_tags.append(f"pages:{page_count}")
        identifiers = book_data.get("identifiers")
        if isinstance(identifiers, dict):
            def _first(value: Any) -> Any:
                if isinstance(value, list) and value:
                    return value[0]
                return value
            for key, ns in (
                ("openlibrary", "openlibrary"),
                ("lccn", "lccn"),
                ("oclc", "oclc"),
                ("goodreads", "goodreads"),
                ("librarything", "librarything"),
                ("doi", "doi"),
                ("internet_archive", "internet_archive"),
            ):
                val = _first(identifiers.get(key))
                if val:
                    new_tags.append(f"{ns}:{val}")
        log(f"Found {len(new_tags)} tag(s) from ISBN lookup")
        return new_tags
    @staticmethod
    def scrape_openlibrary_metadata(olid: str) -> List[str]:
        """Scrape tags for an OpenLibrary ID using the .json API endpoint."""
        new_tags: List[str] = []
        olid_text = str(olid or "").strip()
        if not olid_text:
            return []
        # Normalize OLID to the common "OL<digits>M" form when possible.
        olid_norm = olid_text
        try:
            if not olid_norm.startswith("OL"):
                olid_norm = f"OL{olid_norm}"
            if not olid_norm.endswith("M"):
                olid_norm = f"{olid_norm}M"
        except Exception:
            olid_norm = olid_text
        # Ensure we always include a scrapeable identifier tag.
        new_tags.append(f"openlibrary:{olid_norm}")
        # Accept OL9674499M, 9674499M, or just digits.
        olid_clean = olid_text.replace("OL", "").replace("M", "")
        if not olid_clean.isdigit():
            olid_clean = olid_text
        if not olid_text.startswith("OL"):
            url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
        else:
            url = f"https://openlibrary.org/books/{olid_text}.json"
        try:
            with HTTPClient() as client:
                response = client.get(url)
                response.raise_for_status()
                data = json_module.loads(response.content.decode("utf-8"))
        except Exception as exc:
            log(f"Failed to fetch OpenLibrary metadata: {exc}", file=sys.stderr)
            return []
        if not isinstance(data, dict) or not data:
            log(f"No OpenLibrary metadata found for: {olid_text}")
            return []
        if "title" in data:
            new_tags.append(f"title:{data['title']}")
        authors = data.get("authors")
        if isinstance(authors, list):
            for author in authors[:3]:
                if isinstance(author, dict) and author.get("name"):
                    new_tags.append(f"author:{author['name']}")
                    continue
                # Common OL shape: {"key": "/authors/OL...A"} or {"author": {"key": ...}}
                author_key = None
                if isinstance(author, dict):
                    if isinstance(author.get("author"), dict):
                        author_key = author.get("author", {}).get("key")
                    if not author_key:
                        author_key = author.get("key")
                if isinstance(author_key, str) and author_key.startswith("/"):
                    try:
                        author_url = f"https://openlibrary.org{author_key}.json"
                        with HTTPClient(timeout=10) as client:
                            author_resp = client.get(author_url)
                            author_resp.raise_for_status()
                            author_data = json_module.loads(author_resp.content.decode("utf-8"))
                        if isinstance(author_data, dict) and author_data.get("name"):
                            new_tags.append(f"author:{author_data['name']}")
                            continue
                    except Exception:
                        pass
                if isinstance(author, str) and author:
                    new_tags.append(f"author:{author}")
        if data.get("publish_date"):
            new_tags.append(f"publish_date:{data['publish_date']}")
        publishers = data.get("publishers")
        if isinstance(publishers, list) and publishers:
            pub = publishers[0]
            if isinstance(pub, dict) and pub.get("name"):
                new_tags.append(f"publisher:{pub['name']}")
            elif isinstance(pub, str) and pub:
                new_tags.append(f"publisher:{pub}")
        if "description" in data:
            desc = data.get("description")
            if isinstance(desc, dict) and "value" in desc:
                desc = desc.get("value")
            if desc:
                desc_str = str(desc).strip()
                if desc_str:
                    new_tags.append(f"description:{desc_str[:200]}")
        page_count = data.get("number_of_pages")
        if isinstance(page_count, int) and page_count > 0:
            new_tags.append(f"pages:{page_count}")
        subjects = data.get("subjects")
        if isinstance(subjects, list):
            for subject in subjects[:10]:
                if isinstance(subject, str):
                    subject_clean = subject.strip()
                    if subject_clean and subject_clean not in new_tags:
                        new_tags.append(subject_clean)
        identifiers = data.get("identifiers")
        if isinstance(identifiers, dict):
            def _first(value: Any) -> Any:
                if isinstance(value, list) and value:
                    return value[0]
                return value
            for key, ns in (
                ("isbn_10", "isbn_10"),
                ("isbn_13", "isbn_13"),
                ("lccn", "lccn"),
                ("oclc_numbers", "oclc"),
                ("goodreads", "goodreads"),
                ("internet_archive", "internet_archive"),
            ):
                val = _first(identifiers.get(key))
                if val:
                    new_tags.append(f"{ns}:{val}")
        # Some editions expose a direct Archive.org identifier as "ocaid".
        ocaid = data.get("ocaid")
        if isinstance(ocaid, str) and ocaid.strip():
            new_tags.append(f"internet_archive:{ocaid.strip()}")
        log(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
        return new_tags
    def search(
        self,
        query: str,
@@ -155,7 +697,70 @@ class OpenLibrary(SearchProvider):
        if not isinstance(docs, list):
            return []
-        for doc in docs[: int(limit)]:
+        # Availability enrichment can be slow if done sequentially (it may require multiple
        # network calls per row). Do it concurrently to keep the pipeline responsive.
        docs = docs[: int(limit)]
        def _compute_availability(doc_dict: Dict[str, Any]) -> Tuple[str, str, str, str]:
            edition_id_local = _resolve_edition_id(doc_dict)
            if not edition_id_local:
                return "no-olid", "", "", ""
            ia_val_local = doc_dict.get("ia") or []
            if isinstance(ia_val_local, str):
                ia_val_local = [ia_val_local]
            if not isinstance(ia_val_local, list):
                ia_val_local = []
            ia_ids_local = [str(x) for x in ia_val_local if x]
            session_local = requests.Session()
            try:
                archive_id_local = _resolve_archive_id(session_local, edition_id_local, ia_ids_local)
            except Exception:
                archive_id_local = ""
            if not archive_id_local:
                return "no-archive", "", "", ""
            # Prefer the fastest signal first: OpenLibrary lendable status.
            lendable_local, reason_local = _check_lendable(session_local, edition_id_local)
            if lendable_local:
                return "borrow", reason_local, archive_id_local, ""
            # Not lendable: check whether it's directly downloadable (public domain uploads, etc.).
            try:
                can_direct, pdf_url = self._archive_check_direct_download(archive_id_local)
                if can_direct and pdf_url:
                    return "download", reason_local, archive_id_local, str(pdf_url)
            except Exception:
                pass
            return "unavailable", reason_local, archive_id_local, ""
        availability_rows: List[Tuple[str, str, str, str]] = [("unknown", "", "", "") for _ in range(len(docs))]
        if docs:
            log(f"[openlibrary] Enriching availability for {len(docs)} result(s)...")
            max_workers = min(8, max(1, len(docs)))
            done = 0
            with futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
                future_to_index = {
                    executor.submit(_compute_availability, doc_dict): i
                    for i, doc_dict in enumerate(docs)
                    if isinstance(doc_dict, dict)
                }
                for fut in futures.as_completed(list(future_to_index.keys())):
                    i = future_to_index[fut]
                    try:
                        availability_rows[i] = fut.result()
                    except Exception:
                        availability_rows[i] = ("unknown", "", "", "")
                    done += 1
                    if done in {1, len(future_to_index)} or (done % 10 == 0):
                        log(f"[openlibrary] Availability: {done}/{len(future_to_index)}")
            log("[openlibrary] Availability enrichment complete")
        for idx, doc in enumerate(docs):
            if not isinstance(doc, dict):
                continue
@@ -172,6 +777,7 @@ class OpenLibrary(SearchProvider):
            year = str(year_val) if year_val is not None else ""
            edition_id = _resolve_edition_id(doc)
            work_key = doc.get("key") if isinstance(doc.get("key"), str) else ""
            ia_val = doc.get("ia") or []
            if isinstance(ia_val, str):
@@ -193,9 +799,21 @@ class OpenLibrary(SearchProvider):
                ("Title", book_title),
                ("Author", ", ".join(authors_list)),
                ("Year", year),
                ("Avail", ""),
                ("OLID", edition_id),
            ]
            # Determine availability using the concurrently computed enrichment.
            availability, availability_reason, archive_id, direct_url = ("unknown", "", "", "")
            if 0 <= idx < len(availability_rows):
                availability, availability_reason, archive_id, direct_url = availability_rows[idx]
            # Patch the display column.
            for idx, (name, _val) in enumerate(columns):
                if name == "Avail":
                    columns[idx] = ("Avail", availability)
                    break
            annotations: List[str] = []
            if isbn_13:
                annotations.append(f"isbn_13:{isbn_13}")
@@ -203,12 +821,18 @@ class OpenLibrary(SearchProvider):
                annotations.append(f"isbn_10:{isbn_10}")
            if ia_ids:
                annotations.append("archive")
            if availability in {"download", "borrow"}:
                annotations.append(availability)
            results.append(
                SearchResult(
                    table="openlibrary",
                    title=book_title,
-                    path=(f"https://openlibrary.org/books/{edition_id}" if edition_id else "https://openlibrary.org"),
+                    path=(
                        f"https://openlibrary.org/books/{edition_id}" if edition_id else (
                            f"https://openlibrary.org{work_key}" if isinstance(work_key, str) and work_key.startswith("/") else "https://openlibrary.org"
                        )
                    ),
                    detail=(
                        (f"By: {', '.join(authors_list)}" if authors_list else "")
                        + (f" ({year})" if year else "")
@@ -218,11 +842,16 @@ class OpenLibrary(SearchProvider):
                    columns=columns,
                    full_metadata={
                        "openlibrary_id": edition_id,
                        "openlibrary_key": work_key,
                        "authors": authors_list,
                        "year": year,
                        "isbn_10": isbn_10,
                        "isbn_13": isbn_13,
                        "ia": ia_ids,
                        "availability": availability,
                        "availability_reason": availability_reason,
                        "archive_id": archive_id,
                        "direct_url": direct_url,
                        "raw": doc,
                    },
                )
@@ -256,9 +885,7 @@ class OpenLibrary(SearchProvider):
        # 1) Direct download if available.
        try:
-            from API.archive_client import check_direct_download
+            can_direct, pdf_url = self._archive_check_direct_download(archive_id)
            can_direct, pdf_url = check_direct_download(archive_id)
        except Exception:
            can_direct, pdf_url = False, ""
@@ -272,10 +899,7 @@ class OpenLibrary(SearchProvider):
        # 2) Borrow flow (credentials required).
        try:
-            from API.archive_client import BookNotAvailableError, credential_openlibrary, download as archive_download
+            email, password = self._credential_archive(self.config or {})
            from API.archive_client import get_book_infos, loan, login
            email, password = credential_openlibrary(self.config or {})
            if not email or not password:
                log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr)
                return None
@@ -285,13 +909,13 @@ class OpenLibrary(SearchProvider):
                log(f"[openlibrary] Not lendable: {reason}", file=sys.stderr)
                return None
-            session = login(email, password)
+            session = self._archive_login(email, password)
            try:
-                session = loan(session, archive_id, verbose=False)
+                session = self._archive_loan(session, archive_id, verbose=False)
-            except BookNotAvailableError:
+            except self.BookNotAvailableError:
                log("[openlibrary] Book not available to borrow", file=sys.stderr)
                return None
-            except SystemExit:
+            except Exception:
                log("[openlibrary] Borrow failed", file=sys.stderr)
                return None
@@ -301,7 +925,7 @@ class OpenLibrary(SearchProvider):
            last_exc: Optional[Exception] = None
            for u in urls:
                try:
-                    title_raw, links, _metadata = get_book_infos(session, u)
+                    title_raw, links, _metadata = self._archive_get_book_infos(session, u)
                    if title_raw:
                        title = sanitize_filename(title_raw)
                    break
@@ -315,7 +939,7 @@ class OpenLibrary(SearchProvider):
            temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir))
            try:
-                images = archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id)
+                images = self._archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id)
                try:
                    import img2pdf  # type: ignore
@@ -642,7 +642,7 @@ def _download_direct_file(
        return DownloadMediaResult(
            path=file_path,
            info=info,
-            tags=tags,
+            tag=tags,
            source_url=url,
            hash_value=hash_value,
        )
@@ -36,6 +36,7 @@ mime_maps = {
    "mp3":  { "ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"] },
    "m4a":  { "ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"] },
    "ogg":  { "ext": ".ogg", "mimes": ["audio/ogg"] },
    "opus": { "ext": ".opus", "mimes": ["audio/opus"] },
    "flac": { "ext": ".flac", "mimes": ["audio/flac"] },
    "wav":  { "ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"] },
    "wma":  { "ext": ".wma", "mimes": ["audio/x-ms-wma"] },
@@ -98,3 +99,13 @@ def get_type_from_ext(ext: str) -> str:
            return type_name
    return 'other'
 # Canonical supported extension set for all stores/cmdlets.
 # Derived from mime_maps so there is a single source of truth.
 ALL_SUPPORTED_EXTENSIONS: set[str] = {
    spec["ext"].lower()
    for group in mime_maps.values()
    for spec in group.values()
    if isinstance(spec, dict) and isinstance(spec.get("ext"), str) and spec.get("ext")
 }
@@ -30,6 +30,8 @@ def _resolve_file_hash(db_hash: Optional[str], file_path: Path) -> Optional[str]
    return _normalize_hash(file_path.stem)
 class Folder(Store):
    """"""
    # Track which locations have already been migrated to avoid repeated migrations
@@ -360,6 +362,17 @@ class Folder(Store):
                shutil.copy2(str(file_path), str(save_file))
                debug(f"Local copy: {save_file}", file=sys.stderr)
            # Best-effort: capture duration for media
            duration_value: float | None = None
            try:
                from SYS.utils import ffprobe
                probe = ffprobe(str(save_file))
                duration = probe.get("duration")
                if isinstance(duration, (int, float)) and duration > 0:
                    duration_value = float(duration)
            except Exception:
                duration_value = None
            # Save to database
            with API_folder_store(Path(self._location)) as db:
                db.get_or_create_file_entry(save_file)
@@ -368,7 +381,8 @@ class Folder(Store):
                db.save_metadata(save_file, {
                    'hash': file_hash,
                    'ext': ext_clean,
-                    'size': file_path.stat().st_size
+                    'size': file_path.stat().st_size,
                    'duration': duration_value,
                })
            # Add tags if provided
@@ -405,6 +419,21 @@ class Folder(Store):
        results = []
        search_dir = Path(self._location).expanduser()
        def _url_like_pattern(value: str) -> str:
            # Interpret user patterns as substring matches (with optional glob wildcards).
            v = (value or "").strip().lower()
            if not v or v == "*":
                return "%"
            v = v.replace("%", "\\%").replace("_", "\\_")
            v = v.replace("*", "%").replace("?", "_")
            if "%" not in v and "_" not in v:
                return f"%{v}%"
            if not v.startswith("%"):
                v = "%" + v
            if not v.endswith("%"):
                v = v + "%"
            return v
        tokens = [t.strip() for t in query.split(',') if t.strip()]
        if not match_all and len(tokens) == 1 and _normalize_hash(query):
@@ -453,6 +482,8 @@ class Folder(Store):
            try:
                with DatabaseAPI(search_dir) as api:
                    if tokens and len(tokens) > 1:
                        url_fetch_limit = (limit or 45) * 50
                        def _like_pattern(term: str) -> str:
                            return term.replace('*', '%').replace('?', '_')
@@ -473,6 +504,11 @@ class Folder(Store):
                                    h = api.get_file_hash_by_hash(normalized_hash)
                                    return {h} if h else set()
                                if namespace == 'url':
                                    if not pattern or pattern == '*':
                                        return api.get_file_hashes_with_any_url(limit=url_fetch_limit)
                                    return api.get_file_hashes_by_url_like(_url_like_pattern(pattern), limit=url_fetch_limit)
                                if namespace == 'store':
                                    if pattern not in {'local', 'file', 'filesystem'}:
                                        return set()
@@ -563,6 +599,29 @@ class Folder(Store):
                                    return results
                            return results
                        if namespace == "url":
                            if not pattern or pattern == "*":
                                rows = api.get_files_with_any_url(limit)
                            else:
                                rows = api.get_files_by_url_like(_url_like_pattern(pattern), limit)
                            for file_hash, file_path_str, size_bytes, ext in rows:
                                if not file_path_str:
                                    continue
                                file_path = Path(file_path_str)
                                if not file_path.exists():
                                    continue
                                if size_bytes is None:
                                    try:
                                        size_bytes = file_path.stat().st_size
                                    except OSError:
                                        size_bytes = None
                                tags = api.get_tags_for_file(file_hash)
                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                results.append(entry)
                                if limit is not None and len(results) >= limit:
                                    return results
                            return results
                        query_pattern = f"{namespace}:%"
                        rows = api.get_files_by_namespace_pattern(query_pattern, limit)
                        debug(f"Found {len(rows)} potential matches in DB")
@@ -592,84 +651,37 @@ class Folder(Store):
                            if limit is not None and len(results) >= limit:
                                return results
                    elif not match_all:
                        # Strict tag-based search only (no filename/path searching).
                        terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
                        if not terms:
                            terms = [query_lower]
                        debug(f"Performing filename/tag search for terms: {terms}")
                        fetch_limit = (limit or 45) * 50
-                        conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
+                        # AND semantics across terms: each term must match at least one tag.
-                        params = [f"%{t}%" for t in terms]
+                        hits: dict[str, dict[str, Any]] = {}
                        rows = api.get_files_by_multiple_path_conditions(conditions, params, fetch_limit)
                        debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
                        word_regex = None
                        if len(terms) == 1:
                            term = terms[0]
                            has_wildcard = '*' in term or '?' in term
                            if has_wildcard:
                                try:
                                    from fnmatch import translate
                                    word_regex = re.compile(translate(term), re.IGNORECASE)
                                except Exception:
                                    word_regex = None
                            else:
                                try:
                                    pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
                                    word_regex = re.compile(pattern, re.IGNORECASE)
                                except Exception:
                                    word_regex = None
                        seen_files = set()
                        for file_id, file_path_str, size_bytes, file_hash in rows:
                            if not file_path_str or file_path_str in seen_files:
                                continue
                            if word_regex:
                                p = Path(file_path_str)
                                if not word_regex.search(p.name):
                                    continue
                            seen_files.add(file_path_str)
                            file_path = Path(file_path_str)
                            if file_path.exists():
                                if size_bytes is None:
                                    size_bytes = file_path.stat().st_size
                                tags = api.get_tags_for_file(file_hash)
                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                results.append(entry)
                                if limit is not None and len(results) >= limit:
                                    return results
                        if terms:
                            title_hits: dict[str, dict[str, Any]] = {}
                        for term in terms:
-                                title_pattern = f"title:%{term}%"
+                            tag_pattern = f"%{term}%"
-                                title_rows = api.get_files_by_title_tag_pattern(title_pattern, fetch_limit)
+                            term_rows = api.get_files_by_namespace_pattern(tag_pattern, fetch_limit)
-                                for file_hash, file_path_str, size_bytes, ext in title_rows:
+                            for file_hash, file_path_str, size_bytes, ext in term_rows:
                                if not file_path_str:
                                    continue
-                                    entry = title_hits.get(file_hash)
+                                entry = hits.get(file_hash)
                                if entry:
                                    entry["count"] += 1
                                    if size_bytes is not None:
                                        entry["size"] = size_bytes
                                else:
-                                        title_hits[file_hash] = {
+                                    hits[file_hash] = {
                                        "path": file_path_str,
                                        "size": size_bytes,
                                        "hash": file_hash,
                                        "count": 1,
                                    }
                            if title_hits:
                        required = len(terms)
-                                for file_hash, info in title_hits.items():
+                        seen_files: set[str] = set()
                        for file_hash, info in hits.items():
                            if info.get("count") != required:
                                continue
                            file_path_str = info.get("path")
@@ -688,30 +700,10 @@ class Folder(Store):
                                    size_bytes = None
                            tags = api.get_tags_for_file(file_hash)
-                                    entry = _create_entry(file_path, tags, size_bytes, info.get("hash"))
+                            entry_obj = _create_entry(file_path, tags, size_bytes, info.get("hash"))
-                                    results.append(entry)
+                            results.append(entry_obj)
                            if limit is not None and len(results) >= limit:
-                                        return results
+                                break
                        query_pattern = f"%{query_lower}%"
                        tag_rows = api.get_files_by_simple_tag_pattern(query_pattern, limit)
                        for file_hash, file_path_str, size_bytes, ext in tag_rows:
                            if not file_path_str or file_path_str in seen_files:
                                continue
                            seen_files.add(file_path_str)
                            file_path = Path(file_path_str)
                            if file_path.exists():
                                if size_bytes is None:
                                    size_bytes = file_path.stat().st_size
                                tags = api.get_tags_for_file(file_hash)
                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                results.append(entry)
                                if limit is not None and len(results) >= limit:
                                    return results
                    else:
                        rows = api.get_all_files(limit)
@@ -726,10 +718,8 @@ class Folder(Store):
                                    entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                    results.append(entry)
-                    if results:
+                    backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder")
-                        debug(f"Returning {len(results)} results from DB")
+                    debug(f"[folder:{backend_label}] {len(results)} result(s)")
                    else:
                        debug("No results found in DB")
                    return results
            except Exception as e:
@@ -938,9 +928,11 @@ class Folder(Store):
            file_hash = file_identifier
            if self._location:
                try:
                    from metadata import normalize_urls
                    with API_folder_store(Path(self._location)) as db:
                        meta = db.get_metadata(file_hash) or {}
-                        return list(meta.get("url") or [])
+                        urls = normalize_urls(meta.get("url"))
                        return urls
                except Exception as exc:
                    debug(f"Local DB get_metadata failed: {exc}")
            return []
@@ -955,11 +947,13 @@ class Folder(Store):
            file_hash = file_identifier
            if self._location:
                try:
                    from metadata import normalize_urls
                    with API_folder_store(Path(self._location)) as db:
                        meta = db.get_metadata(file_hash) or {}
-                        existing_urls = list(meta.get("url") or [])
+                        existing_urls = normalize_urls(meta.get("url"))
                        incoming_urls = normalize_urls(url)
                        changed = False
-                        for u in list(url or []):
+                        for u in list(incoming_urls or []):
                            if not u:
                                continue
                            if u not in existing_urls:
@@ -982,10 +976,11 @@ class Folder(Store):
            file_hash = file_identifier
            if self._location:
                try:
                    from metadata import normalize_urls
                    with API_folder_store(Path(self._location)) as db:
                        meta = db.get_metadata(file_hash) or {}
-                        existing_urls = list(meta.get("url") or [])
+                        existing_urls = normalize_urls(meta.get("url"))
-                        remove_set = {u for u in (url or []) if u}
+                        remove_set = {u for u in normalize_urls(url) if u}
                        if not remove_set:
                            return False
                        new_urls = [u for u in existing_urls if u not in remove_set]
@@ -264,6 +264,170 @@ class HydrusNetwork(Store):
            debug(f"Searching Hydrus for: {query}")
            def _extract_urls(meta_obj: Any) -> list[str]:
                if not isinstance(meta_obj, dict):
                    return []
                raw = meta_obj.get("url")
                if raw is None:
                    raw = meta_obj.get("urls")
                if isinstance(raw, str):
                    val = raw.strip()
                    return [val] if val else []
                if isinstance(raw, list):
                    out: list[str] = []
                    for item in raw:
                        if not isinstance(item, str):
                            continue
                        s = item.strip()
                        if s:
                            out.append(s)
                    return out
                return []
            def _iter_url_filtered_metadata(url_value: str | None, want_any: bool, fetch_limit: int) -> list[dict[str, Any]]:
                """Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
                # First try a fast system predicate if Hydrus supports it.
                candidate_file_ids: list[int] = []
                try:
                    if want_any:
                        predicate = "system:has url"
                        url_search = client.search_files(
                            tags=[predicate],
                            return_hashes=False,
                            return_file_ids=True,
                            return_file_count=False,
                        )
                        ids = url_search.get("file_ids", []) if isinstance(url_search, dict) else []
                        if isinstance(ids, list):
                            candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float, str)) and str(x).strip().isdigit()]
                except Exception:
                    candidate_file_ids = []
                if not candidate_file_ids:
                    # Fallback: scan from system:everything and filter by URL substring.
                    everything = client.search_files(
                        tags=["system:everything"],
                        return_hashes=False,
                        return_file_ids=True,
                        return_file_count=False,
                    )
                    ids = everything.get("file_ids", []) if isinstance(everything, dict) else []
                    if isinstance(ids, list):
                        candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float))]
                if not candidate_file_ids:
                    return []
                needle = (url_value or "").strip().lower()
                chunk_size = 200
                out: list[dict[str, Any]] = []
                for start in range(0, len(candidate_file_ids), chunk_size):
                    if len(out) >= fetch_limit:
                        break
                    chunk = candidate_file_ids[start : start + chunk_size]
                    try:
                        payload = client.fetch_file_metadata(
                            file_ids=chunk,
                            include_file_url=True,
                            include_service_keys_to_tags=True,
                            include_duration=True,
                            include_size=True,
                            include_mime=True,
                        )
                    except Exception:
                        continue
                    metas = payload.get("metadata", []) if isinstance(payload, dict) else []
                    if not isinstance(metas, list):
                        continue
                    for meta in metas:
                        if not isinstance(meta, dict):
                            continue
                        urls = _extract_urls(meta)
                        if not urls:
                            continue
                        if want_any:
                            out.append(meta)
                            if len(out) >= fetch_limit:
                                break
                            continue
                        if not needle:
                            continue
                        if any(needle in u.lower() for u in urls):
                            out.append(meta)
                            if len(out) >= fetch_limit:
                                break
                return out
            query_lower = query.lower().strip()
            # Special case: url:* and url:<value>
            metadata_list: list[dict[str, Any]] | None = None
            if ":" in query_lower and not query_lower.startswith(":"):
                namespace, pattern = query_lower.split(":", 1)
                namespace = namespace.strip().lower()
                pattern = pattern.strip()
                if namespace == "url":
                    if not pattern or pattern == "*":
                        metadata_list = _iter_url_filtered_metadata(None, want_any=True, fetch_limit=int(limit) if limit else 100)
                    else:
                        # Fast-path: exact URL via /add_url/get_url_files when a full URL is provided.
                        try:
                            if pattern.startswith("http://") or pattern.startswith("https://"):
                                from API.HydrusNetwork import HydrusRequestSpec
                                spec = HydrusRequestSpec(method="GET", endpoint="/add_url/get_url_files", query={"url": pattern})
                                response = client._perform_request(spec)  # type: ignore[attr-defined]
                                hashes: list[str] = []
                                file_ids: list[int] = []
                                if isinstance(response, dict):
                                    raw_hashes = response.get("hashes") or response.get("file_hashes")
                                    if isinstance(raw_hashes, list):
                                        hashes = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
                                    raw_ids = response.get("file_ids")
                                    if isinstance(raw_ids, list):
                                        for item in raw_ids:
                                            try:
                                                file_ids.append(int(item))
                                            except (TypeError, ValueError):
                                                continue
                                if file_ids:
                                    payload = client.fetch_file_metadata(
                                        file_ids=file_ids,
                                        include_file_url=True,
                                        include_service_keys_to_tags=True,
                                        include_duration=True,
                                        include_size=True,
                                        include_mime=True,
                                    )
                                    metas = payload.get("metadata", []) if isinstance(payload, dict) else []
                                    if isinstance(metas, list):
                                        metadata_list = [m for m in metas if isinstance(m, dict)]
                                elif hashes:
                                    payload = client.fetch_file_metadata(
                                        hashes=hashes,
                                        include_file_url=True,
                                        include_service_keys_to_tags=True,
                                        include_duration=True,
                                        include_size=True,
                                        include_mime=True,
                                    )
                                    metas = payload.get("metadata", []) if isinstance(payload, dict) else []
                                    if isinstance(metas, list):
                                        metadata_list = [m for m in metas if isinstance(m, dict)]
                        except Exception:
                            metadata_list = None
                        # Fallback: substring scan
                        if metadata_list is None:
                            metadata_list = _iter_url_filtered_metadata(pattern, want_any=False, fetch_limit=int(limit) if limit else 100)
            # Parse the query into tags
            # Handle both simple tags and complex queries
            # "*" means "match all" - use system:everything tag in Hydrus
@@ -271,7 +435,6 @@ class HydrusNetwork(Store):
                # Use system:everything to match all files in Hydrus
                tags = ["system:everything"]
            else:
                query_lower = query.lower().strip()
                # If query doesn't have a namespace (no ':'), search all files and filter by title/tags
                # If query has explicit namespace, use it as a tag search
                if ':' not in query_lower:
@@ -286,30 +449,36 @@ class HydrusNetwork(Store):
                debug(f"Found 0 result(s)")
                return []
-            # Search files with the tags
+            # Search files with the tags (unless url: search already produced metadata)
            results = []
            # Split by comma or space for AND logic
            search_terms = set(query_lower.replace(',', ' ').split())  # For substring matching
            if metadata_list is None:
                search_result = client.search_files(
                    tags=tags,
                    return_hashes=True,
                    return_file_ids=True
                )
-            # Extract file IDs from search result
+                file_ids = search_result.get("file_ids", []) if isinstance(search_result, dict) else []
-            file_ids = search_result.get("file_ids", [])
+                hashes = search_result.get("hashes", []) if isinstance(search_result, dict) else []
            hashes = search_result.get("hashes", [])
                if not file_ids and not hashes:
                    debug(f"Found 0 result(s)")
                    return []
            # Fetch metadata for the found files
            results = []
            query_lower = query.lower().strip()
            # Split by comma or space for AND logic
            search_terms = set(query_lower.replace(',', ' ').split())  # For substring matching
                if file_ids:
                    metadata = client.fetch_file_metadata(file_ids=file_ids)
                    metadata_list = metadata.get("metadata", [])
                elif hashes:
                    metadata = client.fetch_file_metadata(hashes=hashes)
                    metadata_list = metadata.get("metadata", [])
                else:
                    metadata_list = []
            if not isinstance(metadata_list, list):
                metadata_list = []
                for meta in metadata_list:
                    if len(results) >= limit:
@@ -119,6 +119,37 @@ class Store:
        self._backend_errors: Dict[str, str] = {}
        self._load_backends()
    def _maybe_register_temp_alias(self, store_type: str, backend_name: str, kwargs: Dict[str, Any], backend: BaseStore) -> None:
        """If a folder backend points at config['temp'], also expose it as the 'temp' backend.
        This keeps config compatibility (e.g. existing 'default') while presenting the temp
        directory under a clearer name.
        """
        try:
            if _normalize_store_type(store_type) != "folder":
                return
            temp_value = self._config.get("temp")
            if not temp_value:
                return
            path_value = kwargs.get("PATH") or kwargs.get("path")
            if not path_value:
                return
            temp_path = Path(str(temp_value)).expanduser().resolve()
            backend_path = Path(str(path_value)).expanduser().resolve()
            if backend_path != temp_path:
                return
            # If the user already has a dedicated temp backend, do nothing.
            if "temp" in self._backends:
                return
            # Keep original name working, but add an alias.
            if backend_name != "temp":
                self._backends["temp"] = backend
        except Exception:
            return
    def _load_backends(self) -> None:
        store_cfg = self._config.get("store")
        if not isinstance(store_cfg, dict):
@@ -161,6 +192,9 @@ class Store:
                    backend_name = str(kwargs.get("NAME") or instance_name)
                    self._backends[backend_name] = backend
                    # If this is the configured temp directory, also alias it as 'temp'.
                    self._maybe_register_temp_alias(store_type, backend_name, kwargs, backend)
                except Exception as exc:
                    err_text = str(exc)
                    self._backend_errors[str(instance_name)] = err_text
@@ -177,11 +211,24 @@ class Store:
        return sorted(self._backends.keys())
    def list_searchable_backends(self) -> list[str]:
-        searchable: list[str] = []
+        # De-duplicate backends by instance (aliases can point at the same object).
        def _rank(name: str) -> int:
            n = str(name or "").strip().lower()
            if n == "temp":
                return 0
            if n == "default":
                return 2
            return 1
        chosen: Dict[int, str] = {}
        for name, backend in self._backends.items():
-            if type(backend).search is not BaseStore.search:
+            if type(backend).search is BaseStore.search:
-                searchable.append(name)
+                continue
-        return sorted(searchable)
+            key = id(backend)
            prev = chosen.get(key)
            if prev is None or _rank(name) < _rank(prev):
                chosen[key] = name
        return sorted(chosen.values())
    def __getitem__(self, backend_name: str) -> BaseStore:
        if backend_name not in self._backends:
@@ -5,10 +5,9 @@ from __future__ import annotations
 import json
 import sys
 import inspect
 from collections.abc import Iterable as IterableABC
-from SYS.logger import log, debug
+from SYS.logger import log
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set
 from dataclasses import dataclass, field
@@ -690,7 +689,9 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
 		get_field(result, "table", "unknown")  # With default
 	"""
 	# Handle lists by accessing the first element
-	if isinstance(obj, list) and obj:
+	if isinstance(obj, list):
 		if not obj:
 			return default
 		obj = obj[0]
 	if isinstance(obj, dict):
@@ -702,8 +703,9 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
 			return value
 		# For PipeObjects, also check the extra field
-		if hasattr(obj, 'extra') and isinstance(obj.extra, dict):
+		extra_val = getattr(obj, 'extra', None)
-			return obj.extra.get(field, default)
+		if isinstance(extra_val, dict):
 			return extra_val.get(field, default)
 		return default
@@ -1118,7 +1120,7 @@ def create_pipe_object_result(
 	Returns:
 		Dict with all PipeObject fields for emission
 	"""
-	result = {
+	result: Dict[str, Any] = {
 		'source': source,
 		'id': identifier,
 		'path': file_path,
@@ -1546,14 +1548,11 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
 		extra = {k: v for k, v in value.items() if k not in known_keys}
 		# Extract URL: prefer direct url field, then url list
-		url_val = value.get("url")
+		from metadata import normalize_urls
-		if not url_val:
+		url_list = normalize_urls(value.get("url"))
-			url = value.get("url") or value.get("url") or []
+		url_val = url_list[0] if url_list else None
-			if url and isinstance(url, list) and len(url) > 0:
+		if len(url_list) > 1:
-				url_val = url[0]
+			extra["url"] = url_list
 			# Preserve url in extra if multiple url exist
 			if url and len(url) > 1:
 				extra["url"] = url
 		# Extract relationships
 		rels = value.get("relationships") or {}
@@ -1,14 +1,16 @@
 from __future__ import annotations
-from typing import Any, Dict, Optional, Sequence, Tuple, List, Union
+from typing import Any, Dict, Optional, Sequence, Tuple, List
 from pathlib import Path
 import sys
 import shutil
 import tempfile
 import models
 import pipeline as ctx
 from API import HydrusNetwork as hydrus_wrapper
 from SYS.logger import log, debug
 from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
 from Store import Store
 from ._shared import (
    Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs,
@@ -20,8 +22,8 @@ from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_sto
 from SYS.utils import sha256_file, unique_path
 from metadata import write_metadata
-# Use official Hydrus supported filetypes from hydrus_wrapper
+# Canonical supported filetypes for all stores/cmdlets
-SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS
+SUPPORTED_MEDIA_EXTENSIONS = ALL_SUPPORTED_EXTENSIONS
 class Add_File(Cmdlet):
    """Add file into the DB"""
@@ -53,92 +55,209 @@ class Add_File(Cmdlet):
    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Main execution entry point."""
        # Parse arguments
        parsed = parse_cmdlet_args(args, self)
        # Initialize state
        path_arg = parsed.get("path")
-        location = parsed.get("store")  # Fixed: was "storage", should be "store"
+        location = parsed.get("store")
        provider_name = parsed.get("provider")
        delete_after = parsed.get("delete", False)
-        # Coerce result to PipeObject; if result is a list, prefer the first element
+        stage_ctx = ctx.get_stage_context()
-        effective_result = result
+        is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
-        if isinstance(result, list) and result:
+
-            first_item = result[0]
+        # Decide which items to process.
-            # Prefer first item if it's a dict or PipeObject
+        # - If user provided -path, treat this invocation as single-item.
-            if isinstance(first_item, (dict, )):
+        # - Otherwise, if piped input is a list, ingest each item.
-                effective_result = first_item
+        if path_arg:
-        pipe_obj = coerce_to_pipe_object(effective_result, path_arg)
+            items_to_process: List[Any] = [result]
        elif isinstance(result, list) and result:
            items_to_process = list(result)
        else:
            items_to_process = [result]
        # Debug: Log input result details
        debug(f"[add-file] INPUT result type={type(result).__name__}")
        if isinstance(result, list):
            debug(f"[add-file] INPUT result is list with {len(result)} items")
            if result and isinstance(result[0], dict):
                first = result[0]
                hash_val = first.get('hash')
                hash_str = hash_val[:12] + "..." if hash_val else "N/A"
                debug(f"[add-file] First item details: title={first.get('title')}, hash={hash_str}, store={first.get('store', 'N/A')}")
        elif isinstance(result, dict):
            hash_val = result.get('hash')
            hash_str = hash_val[:12] + "..." if hash_val else "N/A"
            debug(f"[add-file] INPUT result is dict: title={result.get('title')}, hash={hash_str}, store={result.get('store', 'N/A')}")
        # Debug: Log parsed arguments
        debug(f"[add-file] PARSED args: location={location}, provider={provider_name}, delete={delete_after}")
-        # Resolve source - returns (media_path_or_url, file_hash)
+        collected_payloads: List[Dict[str, Any]] = []
-        media_path_or_url, file_hash = self._resolve_source(result, path_arg, pipe_obj, config)
+        successes = 0
        failures = 0
        # Only run the search-store refresh when add-file is the last stage.
        # In the middle of a pipeline, downstream cmdlets should receive the emitted
        # storage payload directly (no need to re-search and risk duplicate emits).
        auto_search_store_after_add = bool(is_last_stage) and len(items_to_process) == 1
        for item in items_to_process:
            pipe_obj = coerce_to_pipe_object(item, path_arg)
            temp_dir_to_cleanup: Optional[Path] = None
            delete_after_item = delete_after
            try:
                media_path_or_url, file_hash = self._resolve_source(item, path_arg, pipe_obj, config)
                debug(f"[add-file] RESOLVED source: path={media_path_or_url}, hash={file_hash[:12] if file_hash else 'N/A'}...")
                if not media_path_or_url:
-                debug(f"[add-file] ERROR: Could not resolve source file/URL")
+                    failures += 1
-                return 1
+                    continue
                # Update pipe_obj with resolved path
-        pipe_obj.path = str(media_path_or_url) if isinstance(media_path_or_url, (str, Path)) else str(media_path_or_url)
+                pipe_obj.path = str(media_path_or_url)
-        # Check if it's a URL before validating as file
+                # URL targets: prefer provider-aware download for OpenLibrary selections.
-        if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
+                if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
-            debug(f"Detected URL target, delegating to download-data: {media_path_or_url}")
+                    ("http://", "https://", "magnet:", "torrent:")
-            return self._delegate_to_download_data(result, media_path_or_url, location, provider_name, args, config)
+                ):
                    table = None
                    full_metadata = None
                    if isinstance(pipe_obj.extra, dict):
                        table = pipe_obj.extra.get("table")
                        full_metadata = pipe_obj.extra.get("full_metadata")
                    is_openlibrary = (str(table or "").lower() == "openlibrary") or ("openlibrary.org/books/" in media_path_or_url.lower())
                    if is_openlibrary:
                        # Enrich tags from OpenLibrary metadata so the stored file has book tags (author/pages/etc).
                        try:
                            from Provider.openlibrary import OpenLibrary as _OpenLibrary
                            olid = None
                            archive_id = None
                            if isinstance(full_metadata, dict):
                                olid = full_metadata.get("openlibrary_id") or full_metadata.get("openlibrary")
                                archive_id = full_metadata.get("archive_id")
                            if not olid:
                                import re
                                m = re.search(r"/books/(OL\d+M)", str(media_path_or_url), flags=re.IGNORECASE)
                                if m:
                                    olid = m.group(1)
                            scraped_tags: List[str] = []
                            if olid:
                                scraped_tags.extend(_OpenLibrary.scrape_openlibrary_metadata(str(olid)) or [])
                            if archive_id:
                                scraped_tags.append(f"internet_archive:{archive_id}")
                            if scraped_tags:
                                existing = list(pipe_obj.tag or [])
                                pipe_obj.tag = merge_sequences(existing, scraped_tags, case_sensitive=False)
                        except Exception:
                            pass
                        from ProviderCore.registry import get_search_provider
                        from ProviderCore.base import SearchResult
                        provider = get_search_provider("openlibrary", config)
                        if provider is None:
                            log("[add-file] OpenLibrary provider not available", file=sys.stderr)
                            failures += 1
                            continue
                        temp_dir_to_cleanup = Path(tempfile.mkdtemp(prefix="medios_openlibrary_"))
                        sr = SearchResult(
                            table="openlibrary",
                            title=str(getattr(pipe_obj, "title", None) or "Unknown"),
                            path=str(media_path_or_url),
                            full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
                        )
                        downloaded = provider.download(sr, temp_dir_to_cleanup)
                        if downloaded is None:
                            log("[add-file] OpenLibrary download failed", file=sys.stderr)
                            failures += 1
                            continue
                        downloaded_path = Path(downloaded)
                        if downloaded_path.exists() and downloaded_path.is_dir():
                            log(
                                "[add-file] OpenLibrary download produced a directory (missing img2pdf?). Cannot ingest.",
                                file=sys.stderr,
                            )
                            failures += 1
                            continue
                        media_path_or_url = str(downloaded_path)
                        pipe_obj.path = str(downloaded_path)
                        delete_after_item = True
                    # For non-provider URLs, or if still a URL after provider attempt, delegate to download-media.
                    if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
                        ("http://", "https://", "magnet:", "torrent:")
                    ):
                        code = self._delegate_to_download_data(item, media_path_or_url, location, provider_name, args, config)
                        if code == 0:
                            successes += 1
                        else:
                            failures += 1
                        continue
        # Convert to Path and validate
                media_path = Path(media_path_or_url) if isinstance(media_path_or_url, str) else media_path_or_url
        # Validate source
                if not self._validate_source(media_path):
-            debug(f"[add-file] ERROR: Source validation failed for {media_path}")
+                    failures += 1
-            return 1
+                    continue
        # Debug: Log execution path decision
        debug(f"[add-file] DECISION POINT: provider={provider_name}, location={location}")
        debug(f"  media_path={media_path}, exists={media_path.exists()}")
        # Execute transfer based on destination (using Store registry)
                if provider_name:
-            debug(f"[add-file] ROUTE: file provider upload")
+                    code = self._handle_provider_upload(media_path, provider_name, pipe_obj, config, delete_after_item)
-            return self._handle_provider_upload(media_path, provider_name, pipe_obj, config, delete_after)
+                    if code == 0:
-        elif location:
+                        successes += 1
-            # Check if location is a registered backend name
+                    else:
                        failures += 1
                    continue
                if location:
                    try:
                        store = Store(config)
                        backends = store.list_backends()
                        if location in backends:
-                    debug(f"[add-file] ROUTE: storage backend '{location}'")
+                            code = self._handle_storage_backend(
-                    return self._handle_storage_backend(media_path, location, pipe_obj, config, delete_after)
+                                item,
                                media_path,
                                location,
                                pipe_obj,
                                config,
                                delete_after_item,
                                collect_payloads=collected_payloads,
                                suppress_last_stage_overlay=is_last_stage and len(items_to_process) > 1,
                                auto_search_store=auto_search_store_after_add,
                            )
                        else:
-                    # Treat as local export path
+                            code = self._handle_local_export(media_path, location, pipe_obj, config, delete_after_item)
                    debug(f"[add-file] ROUTE: local export to path '{location}'")
                    return self._handle_local_export(media_path, location, pipe_obj, config, delete_after)
                    except Exception as exc:
                        debug(f"[add-file] ERROR: Failed to resolve location: {exc}")
                        log(f"Invalid location: {location}", file=sys.stderr)
-                return 1
+                        failures += 1
                        continue
                    if code == 0:
                        successes += 1
                    else:
-            debug(f"[add-file] ERROR: No location or provider specified")
+                        failures += 1
-            log(f"No storage location or provider specified", file=sys.stderr)
+                    continue
                log("No destination specified", file=sys.stderr)
                failures += 1
            finally:
                if temp_dir_to_cleanup is not None:
                    try:
                        shutil.rmtree(temp_dir_to_cleanup, ignore_errors=True)
                    except Exception:
                        pass
        # If we processed multiple storage ingests, present a single consolidated overlay table.
        if is_last_stage and len(items_to_process) > 1 and collected_payloads:
            try:
                from result_table import ResultTable
                table = ResultTable("Result")
                for payload in collected_payloads:
                    table.add_result(payload)
                # Make this the active selectable table so @.. returns here (and playlist table is kept in history).
                ctx.set_last_result_table(table, collected_payloads, subject=collected_payloads)
            except Exception:
                pass
        if successes > 0:
            return 0
        return 1
    @staticmethod
@@ -150,9 +269,6 @@ class Add_File(Cmdlet):
    ) -> Tuple[Optional[Path | str], Optional[str]]:
        """Resolve the source file path from args or pipeline result.
        PRIORITY: hash+store pattern is preferred over path-based resolution.
        This ensures consistency when @N selections pass hash+store identifiers.
        Returns (media_path_or_url, file_hash)
        where media_path_or_url can be a Path object or a URL string.
        """
@@ -161,8 +277,9 @@ class Add_File(Cmdlet):
            result_hash = result.get("hash")
            result_store = result.get("store")
            if result_hash and result_store:
-                debug(f"[add-file] Using hash+store from result: hash={result_hash[:12]}..., store={result_store}")
+                debug(
-                # Use get_file to retrieve from the specific store
+                    f"[add-file] Using hash+store from result: hash={str(result_hash)[:12]}..., store={result_store}"
                )
                try:
                    store = Store(config)
                    if result_store in store.list_backends():
@@ -170,13 +287,12 @@ class Add_File(Cmdlet):
                        media_path = backend.get_file(result_hash)
                        if isinstance(media_path, Path) and media_path.exists():
                            pipe_obj.path = str(media_path)
-                            debug(f"[add-file] Retrieved file from {result_store}: {media_path}")
+                            return media_path, str(result_hash)
-                            return media_path, result_hash
+                        if isinstance(media_path, str) and media_path.lower().startswith(
-
+                            ("http://", "https://", "magnet:", "torrent:")
-                        if isinstance(media_path, str) and media_path.lower().startswith(("http://", "https://")):
+                        ):
                            pipe_obj.path = media_path
-                            debug(f"[add-file] Retrieved URL from {result_store}: {media_path}")
+                            return media_path, str(result_hash)
                            return media_path, result_hash
                except Exception as exc:
                    debug(f"[add-file] Failed to retrieve via hash+store: {exc}")
@@ -196,10 +312,9 @@ class Add_File(Cmdlet):
                file_hash = pipe_path_str.split(":", 1)[1]
                media_path, success = Add_File._fetch_hydrus_path(file_hash, config)
                return media_path, file_hash if success else None
-            # Check if pipe_path is a URL - skip to URL handling below
+            if pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
-            if not pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
+                return pipe_path_str, None
-                media_path = Path(pipe_path_str)
+            return Path(pipe_path_str), None
                return media_path, None
        # PRIORITY 4: Try from pipe_obj.url (for streaming url without downloaded file)
        pipe_url = getattr(pipe_obj, "url", None)
@@ -248,8 +363,9 @@ class Add_File(Cmdlet):
                # Look for path or path-like keys
                path_candidate = first_item.get("path") or first_item.get("filepath") or first_item.get("file")
                # If the dict includes a 'paths' list (multi-part/section download), prefer the first file
-                if not path_candidate and isinstance(first_item.get("paths"), (list, tuple)) and first_item.get("paths"):
+                paths_val = first_item.get("paths")
-                    path_candidate = first_item.get("paths")[0]
+                if not path_candidate and isinstance(paths_val, (list, tuple)) and paths_val:
                    path_candidate = paths_val[0]
                if path_candidate:
                    debug(f"Resolved path from result dict: {path_candidate}")
                    try:
@@ -361,8 +477,10 @@ class Add_File(Cmdlet):
            selection_args = result["_selection_args"]
            if selection_args:
                dl_args.extend(selection_args)
-        elif hasattr(result, 'extra') and isinstance(result.extra, dict) and "_selection_args" in result.extra:
+        else:
-            selection_args = result.extra["_selection_args"]
+            extra_val = getattr(result, "extra", None)
            if isinstance(extra_val, dict) and "_selection_args" in extra_val:
                selection_args = extra_val["_selection_args"]
                if selection_args:
                    dl_args.extend(selection_args)
@@ -375,18 +493,32 @@ class Add_File(Cmdlet):
    @staticmethod
    def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]:
-        url: List[str] = []
+        from metadata import normalize_urls
        # Prefer explicit PipeObject.url if present
        urls: List[str] = []
        try:
            urls = normalize_urls(getattr(pipe_obj, "url", None))
        except Exception:
            urls = []
        # Then check extra.url
        if not urls:
            try:
                if isinstance(pipe_obj.extra, dict):
-                url = list(pipe_obj.extra.get("url") or pipe_obj.extra.get("url") or [])
+                    urls = normalize_urls(pipe_obj.extra.get("url"))
            except Exception:
                pass
-        if not url and isinstance(result, dict):
+        # Then check result dict
-            url = list(result.get("url") or result.get("url") or [])
+        if not urls and isinstance(result, dict):
-        if not url:
+            urls = normalize_urls(result.get("url"))
-            url = list(extract_url_from_result(result) or [])
+
-        return url
+        # Finally, try extractor helper
        if not urls:
            urls = normalize_urls(extract_url_from_result(result))
        return urls
    @staticmethod
    def _get_relationships(result: Any, pipe_obj: models.PipeObject) -> Optional[Dict[str, Any]]:
@@ -405,10 +537,36 @@ class Add_File(Cmdlet):
    @staticmethod
    def _get_duration(result: Any, pipe_obj: models.PipeObject) -> Optional[float]:
-        if getattr(pipe_obj, "duration", None) is not None:
+        def _parse_duration(value: Any) -> Optional[float]:
-            return pipe_obj.duration
+            if value is None:
                return None
            if isinstance(value, (int, float)):
                return float(value) if value > 0 else None
            if isinstance(value, str):
                s = value.strip()
                if not s:
                    return None
                try:
-            return extract_duration(result)
+                    candidate = float(s)
                    return candidate if candidate > 0 else None
                except ValueError:
                    pass
                if ":" in s:
                    parts = [p.strip() for p in s.split(":") if p.strip()]
                    if len(parts) in {2, 3} and all(p.isdigit() for p in parts):
                        nums = [int(p) for p in parts]
                        if len(nums) == 2:
                            minutes, seconds = nums
                            return float(minutes * 60 + seconds)
                        hours, minutes, seconds = nums
                        return float(hours * 3600 + minutes * 60 + seconds)
            return None
        parsed = _parse_duration(getattr(pipe_obj, "duration", None))
        if parsed is not None:
            return parsed
        try:
            return _parse_duration(extract_duration(result))
        except Exception:
            return None
@@ -442,19 +600,20 @@ class Add_File(Cmdlet):
        ctx.set_current_stage_table(None)
    @staticmethod
-    def _emit_storage_result(payload: Dict[str, Any]) -> None:
+    def _emit_storage_result(payload: Dict[str, Any], *, overlay: bool = True, emit: bool = True) -> None:
        """Emit a storage-style result payload.
        - Always emits the dict downstream (when in a pipeline).
        - If this is the last stage (or not in a pipeline), prints a search-store-like table
          and sets an overlay table/items for @N selection.
        """
-        # Always emit for downstream commands (no-op if not in a pipeline)
+        # Emit for downstream commands (no-op if not in a pipeline)
        if emit:
            ctx.emit(payload)
        stage_ctx = ctx.get_stage_context()
        is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
-        if not is_last:
+        if not is_last or not overlay:
            return
        try:
@@ -470,6 +629,53 @@ class Add_File(Cmdlet):
            except Exception:
                pass
    @staticmethod
    def _try_emit_search_store_by_hash(*, store: str, hash_value: str, config: Dict[str, Any]) -> bool:
        """Run search-store for a single hash so the final table/payload is consistent.
        Important: `add-file` is treated as an action command by the CLI, so the CLI only
        prints tables for it when a display overlay exists. After running search-store,
        this copies the resulting table into the display overlay (when this is the last
        stage) so the canonical store table is what the user sees and can select from.
        Returns True if search-store ran successfully, else False.
        """
        try:
            from cmdlet.search_store import CMDLET as search_store_cmdlet
            args = ["-store", str(store), f"hash:{str(hash_value)}"]
            log(f"[add-file] Refresh: search-store -store {store} \"hash:{hash_value}\"", file=sys.stderr)
            # Run search-store under a temporary stage context so its ctx.emit() calls
            # don't interfere with the outer add-file pipeline stage.
            prev_ctx = ctx.get_stage_context()
            temp_ctx = ctx.PipelineStageContext(stage_index=0, total_stages=1, worker_id=getattr(prev_ctx, "worker_id", None))
            ctx.set_stage_context(temp_ctx)
            try:
                code = search_store_cmdlet.run(None, args, config)
            finally:
                ctx.set_stage_context(prev_ctx)
            if code != 0:
                return False
            # Promote the search-store result to a display overlay so the CLI prints it
            # for action commands like add-file.
            stage_ctx = ctx.get_stage_context()
            is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
            if is_last:
                try:
                    table = ctx.get_last_result_table()
                    items = ctx.get_last_result_items()
                    if table is not None and items:
                        ctx.set_last_result_table_overlay(table, items, subject={"store": store, "hash": hash_value})
                except Exception:
                    pass
            return True
        except Exception as exc:
            debug(f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}")
            return False
    @staticmethod
    def _prepare_metadata(
        result: Any,
@@ -664,8 +870,9 @@ class Add_File(Cmdlet):
            if not username or not filename:
                debug(f"[add-file] ERROR: Could not extract soulseek metadata from result (type={type(result).__name__})")
-                if hasattr(result, "extra"):
+                extra_val = getattr(result, "extra", None)
-                    debug(f"[add-file] Result extra keys: {list(result.extra.keys())}")
+                if isinstance(extra_val, dict):
                    debug(f"[add-file] Result extra keys: {list(extra_val.keys())}")
                return None
            if not username or not filename:
@@ -769,28 +976,55 @@ class Add_File(Cmdlet):
    @staticmethod
    def _handle_storage_backend(
        result: Any,
        media_path: Path,
        backend_name: str,
        pipe_obj: models.PipeObject,
        config: Dict[str, Any],
        delete_after: bool,
        *,
        collect_payloads: Optional[List[Dict[str, Any]]] = None,
        suppress_last_stage_overlay: bool = False,
        auto_search_store: bool = True,
    ) -> int:
        """Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.)."""
        log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr)
        delete_after_effective = bool(delete_after)
        if not delete_after_effective:
            # When download-media is piped into add-file, the downloaded artifact is a temp file.
            # After it is persisted to a storage backend, delete the temp copy to avoid duplicates.
            try:
                if (
                    str(backend_name or "").strip().lower() != "temp"
                    and getattr(pipe_obj, "is_temp", False)
                    and getattr(pipe_obj, "action", None) == "cmdlet:download-media"
                ):
                    from config import resolve_output_dir
                    temp_dir = resolve_output_dir(config)
                    try:
                        if media_path.resolve().is_relative_to(temp_dir.expanduser().resolve()):
                            delete_after_effective = True
                            debug(f"[add-file] Auto-delete temp source after ingest: {media_path}")
                    except Exception:
                        # If path resolution fails, fall back to non-destructive behavior
                        pass
            except Exception:
                pass
        try:
            store = Store(config)
            backend = store[backend_name]
            # Prepare metadata from pipe_obj and sidecars
-            tags, url, title, f_hash = Add_File._prepare_metadata(None, media_path, pipe_obj, config)
+            tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
            # Call backend's add_file with full metadata
            # Backend returns hash as identifier
            file_identifier = backend.add_file(
                media_path, 
                title=title,
-                tags=tags,
+                tag=tags,
                url=url
            )
            log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr)
@@ -822,6 +1056,14 @@ class Add_File(Cmdlet):
            # Keep hash/store for downstream commands (get-tag, get-file, etc.).
            resolved_hash = file_identifier if len(file_identifier) == 64 else (f_hash or file_identifier or "unknown")
            # If we have url(s), ensure they get associated with the destination file.
            # This mirrors `add-url` behavior but avoids emitting extra pipeline noise.
            if url:
                try:
                    backend.add_url(resolved_hash, list(url))
                except Exception:
                    pass
            meta: Dict[str, Any] = {}
            try:
                meta = backend.get_metadata(resolved_hash) or {}
@@ -865,9 +1107,30 @@ class Add_File(Cmdlet):
                "tag": list(tags or []),
                "url": list(url or []),
            }
-            Add_File._emit_storage_result(payload)
+            if collect_payloads is not None:
                try:
                    collect_payloads.append(payload)
                except Exception:
                    pass
-            Add_File._cleanup_after_success(media_path, delete_source=delete_after)
+            # Keep the add-file 1-row summary overlay (when last stage), then emit the
            # canonical search-store payload/table for piping/selection consistency.
            if auto_search_store and resolved_hash and resolved_hash != "unknown":
                # Show the add-file summary (overlay only) but let search-store provide the downstream payload.
                Add_File._emit_storage_result(payload, overlay=not suppress_last_stage_overlay, emit=False)
                ok = Add_File._try_emit_search_store_by_hash(
                    store=backend_name,
                    hash_value=resolved_hash,
                    config=config,
                )
                if not ok:
                    # Fall back to emitting the add-file payload so downstream stages still receive an item.
                    ctx.emit(payload)
            else:
                Add_File._emit_storage_result(payload, overlay=not suppress_last_stage_overlay, emit=True)
            Add_File._cleanup_after_success(media_path, delete_source=delete_after_effective)
            return 0
        except Exception as exc:
@@ -3,7 +3,6 @@ from __future__ import annotations
 from typing import Any, Dict, Sequence
 import sys
 from . import register
 import pipeline as ctx
 from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
 from SYS.logger import log
@@ -13,18 +12,23 @@ from Store import Store
 class Add_Url(Cmdlet):
    """Add URL associations to files via hash+store."""
-    NAME = "add-url"
+    def __init__(self) -> None:
-    SUMMARY = "Associate a URL with a file"
+        super().__init__(
-    USAGE = "@1 | add-url <url>"
+            name="add-url",
-    ARGS = [
+            summary="Associate a URL with a file",
            usage="@1 | add-url <url>",
            arg=[
                SharedArgs.HASH,
                SharedArgs.STORE,
                CmdletArg("url", required=True, description="URL to associate"),
-    ]
+            ],
-    DETAIL = [
+            detail=[
                "- Associates URL with file identified by hash+store",
                "- Multiple url can be comma-separated",
-    ]
+            ],
            exec=self.run,
        )
        self.register()
    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Add URL to file via hash+store backend."""
@@ -78,8 +82,7 @@ class Add_Url(Cmdlet):
            return 1
-# Register cmdlet
+CMDLET = Add_Url()
 register(["add-url", "add_url"])(Add_Url)
@@ -3,7 +3,6 @@ from __future__ import annotations
 from typing import Any, Dict, Sequence
 import sys
 from . import register
 import pipeline as ctx
 from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
 from SYS.logger import log
@@ -13,18 +12,23 @@ from Store import Store
 class Delete_Url(Cmdlet):
    """Delete URL associations from files via hash+store."""
-    NAME = "delete-url"
+    def __init__(self) -> None:
-    SUMMARY = "Remove a URL association from a file"
+        super().__init__(
-    USAGE = "@1 | delete-url <url>"
+            name="delete-url",
-    ARGS = [
+            summary="Remove a URL association from a file",
            usage="@1 | delete-url <url>",
            arg=[
                SharedArgs.HASH,
                SharedArgs.STORE,
                CmdletArg("url", required=True, description="URL to remove"),
-    ]
+            ],
-    DETAIL = [
+            detail=[
                "- Removes URL association from file identified by hash+store",
                "- Multiple url can be comma-separated",
-    ]
+            ],
            exec=self.run,
        )
        self.register()
    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Delete URL from file via hash+store backend."""
@@ -78,5 +82,4 @@ class Delete_Url(Cmdlet):
            return 1
-# Register cmdlet
+CMDLET = Delete_Url()
 register(["delete-url", "del-url", "delete_url"])(Delete_Url)
@@ -190,9 +190,11 @@ class Download_File(Cmdlet):
                    # If this looks like a provider item and providers are available, prefer provider.download()
                    downloaded_path: Optional[Path] = None
                    attempted_provider_download = False
                    if table and get_search_provider and SearchResult:
                        provider = get_search_provider(str(table), config)
                        if provider is not None:
                            attempted_provider_download = True
                            sr = SearchResult(
                                table=str(table),
                                title=str(title or "Unknown"),
@@ -202,6 +204,19 @@ class Download_File(Cmdlet):
                            debug(f"[download-file] Downloading provider item via {table}: {sr.title}")
                            downloaded_path = provider.download(sr, final_output_dir)
                    # OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML.
                    if downloaded_path is None and attempted_provider_download and str(table or "").lower() == "openlibrary":
                        availability = None
                        reason = None
                        if isinstance(full_metadata, dict):
                            availability = full_metadata.get("availability")
                            reason = full_metadata.get("availability_reason")
                        msg = "[download-file] OpenLibrary item not downloadable"
                        if availability or reason:
                            msg += f" (availability={availability or ''} reason={reason or ''})"
                        log(msg, file=sys.stderr)
                        continue
                    # Fallback: if we have a direct HTTP URL, download it directly
                    if downloaded_path is None and isinstance(target, str) and target.startswith("http"):
                        debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
@@ -693,6 +693,7 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
                return
            # Extract relevant fields
            webpage_url = info.get("webpage_url") or info.get("original_url") or info.get("url")
            result_container[0] = {
                "extractor": info.get("extractor", ""),
                "title": info.get("title", ""),
@@ -700,7 +701,9 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
                "duration": info.get("duration"),
                "uploader": info.get("uploader"),
                "description": info.get("description"),
-                "url": url,
+                # Keep both the requested and canonical URL forms; callers should prefer webpage_url.
                "requested_url": url,
                "webpage_url": webpage_url,
            }
        except Exception as exc:
            log(f"Probe error for {url}: {exc}")
@@ -1220,9 +1223,359 @@ class Download_Media(Cmdlet):
                    log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
                    return 1
            quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
            storage = None
            hydrus_available = True
            try:
                from Store import Store
                storage = Store(config=config or {}, suppress_debug=True)
                from API.HydrusNetwork import is_hydrus_available
                hydrus_available = bool(is_hydrus_available(config or {}))
            except Exception:
                storage = None
            def _preflight_url_duplicate(candidate_url: str, extra_urls: Optional[Sequence[str]] = None) -> bool:
                # NOTE: download-media sets _quiet_background_output=True when running in a pipeline to
                # reduce background noise. URL de-dup is interactive and must still run in pipelines.
                if storage is None:
                    debug("Preflight URL check skipped: storage unavailable")
                    return True
                debug(f"Preflight URL check: candidate={candidate_url}")
                try:
                    from metadata import normalize_urls
                except Exception:
                    normalize_urls = None  # type: ignore[assignment]
                needles: List[str] = []
                if normalize_urls is not None:
                    for raw in [candidate_url, *(list(extra_urls) if extra_urls else [])]:
                        try:
                            needles.extend(normalize_urls(raw))
                        except Exception:
                            continue
                # Fallback: always have at least one needle
                if not needles:
                    needles = [str(candidate_url)]
                # Deduplicate needles (preserve order)
                seen_needles: List[str] = []
                for needle in needles:
                    if needle and needle not in seen_needles:
                        seen_needles.append(needle)
                needles = seen_needles
                try:
                    debug(f"Preflight URL needles: {needles}")
                except Exception:
                    pass
                url_matches: List[Dict[str, Any]] = []
                try:
                    from Store.HydrusNetwork import HydrusNetwork
                    # Avoid searching the temp/download directory backend during dedup.
                    # We only want to warn about duplicates in real stores.
                    backend_names_all = storage.list_searchable_backends()
                    backend_names: List[str] = []
                    skipped: List[str] = []
                    for backend_name in backend_names_all:
                        try:
                            backend = storage[backend_name]
                        except Exception:
                            continue
                        try:
                            if str(backend_name).strip().lower() == "temp":
                                skipped.append(backend_name)
                                continue
                        except Exception:
                            pass
                        # Heuristic: if a Folder backend points at the configured temp output dir, skip it.
                        try:
                            backend_location = getattr(backend, "_location", None)
                            if backend_location and final_output_dir:
                                backend_path = Path(str(backend_location)).expanduser().resolve()
                                temp_path = Path(str(final_output_dir)).expanduser().resolve()
                                if backend_path == temp_path:
                                    skipped.append(backend_name)
                                    continue
                        except Exception:
                            pass
                        backend_names.append(backend_name)
                    try:
                        if skipped:
                            debug(f"Preflight backends: {backend_names} (skipped temp: {skipped})")
                        else:
                            debug(f"Preflight backends: {backend_names}")
                    except Exception:
                        pass
                    for backend_name in backend_names:
                        backend = storage[backend_name]
                        if isinstance(backend, HydrusNetwork) and not hydrus_available:
                            continue
                        backend_hits: List[Dict[str, Any]] = []
                        for needle in needles:
                            try:
                                backend_hits = backend.search(f"url:{needle}", limit=25) or []
                                if backend_hits:
                                    break
                            except Exception:
                                continue
                        if backend_hits:
                            url_matches.extend([dict(x) if isinstance(x, dict) else {"title": str(x)} for x in backend_hits])
                        if len(url_matches) >= 25:
                            url_matches = url_matches[:25]
                            break
                except Exception:
                    url_matches = []
                if not url_matches:
                    debug("Preflight URL check: no matches")
                    return True
                table = ResultTable(f"URL already exists ({len(url_matches)} match(es))")
                results_list: List[Dict[str, Any]] = []
                for item in url_matches:
                    if "title" not in item:
                        item["title"] = item.get("name") or item.get("target") or item.get("path") or "Result"
                    table.add_result(item)
                    results_list.append(item)
                pipeline_context.set_current_stage_table(table)
                pipeline_context.set_last_result_table(table, results_list)
                print(f"\n{table}")
                response = input("Continue anyway? (y/n): ").strip().lower()
                if response not in {"y", "yes"}:
                    return False
                return True
            def _canonicalize_url_for_storage(requested_url: str) -> str:
                # Prefer yt-dlp's canonical webpage URL (e.g. strips timestamps/redirects).
                # Fall back to the requested URL if probing fails.
                # Important: when playlist item selection is used, avoid probing (can hang on large playlists).
                if playlist_items:
                    return str(requested_url)
                try:
                    pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15)
                    if isinstance(pr, dict):
                        for key in ("webpage_url", "original_url", "url", "requested_url"):
                            value = pr.get(key)
                            if isinstance(value, str) and value.strip():
                                return value.strip()
                except Exception:
                    pass
                return str(requested_url)
            # Check if we need to show format selection
            playlist_items = str(parsed.get("item")) if parsed.get("item") else None
            ytdl_format = parsed.get("format")
            playlist_selection_handled = False
            def _parse_at_selection(choice: str, *, max_index: int) -> Optional[List[int]]:
                """Parse @ selection syntax (@2, @2-5, @{1,3,5}, @2,5,7) into 1-based indices."""
                raw = str(choice or "").strip()
                if not raw:
                    return None
                if raw.lower() in {"q", "quit", "cancel"}:
                    return None
                if raw == "@*" or raw == "*":
                    return list(range(1, max_index + 1))
                if raw.startswith("@"):
                    raw = raw[1:].strip()
                if raw.startswith("{") and raw.endswith("}"):
                    raw = raw[1:-1].strip()
                if not raw:
                    return None
                indices: set[int] = set()
                for part in raw.split(","):
                    part = part.strip()
                    if not part:
                        continue
                    if "-" in part:
                        left, right = [p.strip() for p in part.split("-", 1)]
                        if not left or not right:
                            return None
                        try:
                            start = int(left)
                            end = int(right)
                        except ValueError:
                            return None
                        if start < 1 or end < 1:
                            return None
                        if end < start:
                            start, end = end, start
                        for i in range(start, end + 1):
                            if 1 <= i <= max_index:
                                indices.add(i)
                    else:
                        try:
                            i = int(part)
                        except ValueError:
                            return None
                        if 1 <= i <= max_index:
                            indices.add(i)
                if not indices:
                    return None
                return sorted(indices)
            def _maybe_prompt_playlist_items(url: str) -> Optional[Dict[str, Any]]:
                """If URL appears to be a playlist/channel/collection, prompt user for @ selection.
                Returns:
                    - None if URL is not a playlist-like multi-entry page (or probe fails)
                    - Dict with keys:
                        - cancel: bool
                        - playlist_items: Optional[str] (None means download all)
                        - selected_urls: Optional[List[str]] (expanded per-entry urls when available)
                """
                try:
                    pr = probe_url(url, no_playlist=False, timeout_seconds=15)
                except Exception:
                    pr = None
                if not isinstance(pr, dict):
                    return None
                entries = pr.get("entries")
                if not isinstance(entries, list) or len(entries) <= 1:
                    return None
                # Display table (limit rows to keep output reasonable)
                max_rows = 200
                display_entries = entries[:max_rows]
                total = len(entries)
                def _entry_to_url(entry: Any) -> Optional[str]:
                    if not isinstance(entry, dict):
                        return None
                    # Prefer explicit absolute URLs when present
                    for key in ("webpage_url", "original_url", "url"):
                        v = entry.get(key)
                        if isinstance(v, str) and v.strip():
                            s = v.strip()
                            try:
                                if urlparse(s).scheme in {"http", "https"}:
                                    return s
                            except Exception:
                                return s
                    # Best-effort YouTube fallback from id
                    entry_id = entry.get("id")
                    if isinstance(entry_id, str) and entry_id.strip():
                        extractor_name = str(pr.get("extractor") or pr.get("extractor_key") or "").lower()
                        if "youtube" in extractor_name:
                            return f"https://www.youtube.com/watch?v={entry_id.strip()}"
                    return None
                table = ResultTable()
                table.title = f"Playlist items ({total}{' shown ' + str(len(display_entries)) if total > max_rows else ''})"
                table.set_source_command("download-media", [url])
                try:
                    table.set_preserve_order(True)
                except Exception:
                    pass
                results_list: List[Dict[str, Any]] = []
                for idx, entry in enumerate(display_entries, 1):
                    title = None
                    uploader = None
                    duration = None
                    try:
                        if isinstance(entry, dict):
                            title = entry.get("title")
                            uploader = entry.get("uploader") or pr.get("uploader")
                            duration = entry.get("duration")
                    except Exception:
                        pass
                    row: Dict[str, Any] = {
                        "table": "download-media",
                        "title": str(title or f"Item {idx}"),
                        "detail": str(uploader or ""),
                        "media_kind": "playlist-item",
                        "playlist_index": idx,
                        "columns": [
                            ("#", str(idx)),
                            ("Title", str(title or "")),
                            ("Duration", str(duration or "")),
                            ("Uploader", str(uploader or "")),
                        ],
                    }
                    results_list.append(row)
                    table.add_result(row)
                pipeline_context.set_current_stage_table(table)
                pipeline_context.set_last_result_table(table, results_list)
                print(f"\n{table}")
                choice = input("Select items to download (@N, @2-5, @{1,3}, @*, or 'q' to cancel): ").strip()
                if not choice or choice.lower() in {"q", "quit", "cancel"}:
                    return {"cancel": True, "playlist_items": None, "selected_urls": []}
                if choice.strip() == "@*" or choice.strip() == "*":
                    # @* means all entries, not just displayed rows.
                    selected_urls: List[str] = []
                    for entry in entries:
                        u = _entry_to_url(entry)
                        if u and u not in selected_urls:
                            selected_urls.append(u)
                    # Only expand when we can derive URLs for all entries; otherwise fall back to yt-dlp playlist handling.
                    if len(selected_urls) == len(entries):
                        return {"cancel": False, "playlist_items": None, "selected_urls": selected_urls}
                    return {"cancel": False, "playlist_items": None, "selected_urls": []}
                parsed_indices = _parse_at_selection(choice, max_index=len(display_entries))
                if not parsed_indices:
                    log("Invalid selection. Use @N, @2-5, @{1,3}, or @*", file=sys.stderr)
                    return {"cancel": True, "playlist_items": None, "selected_urls": []}
                selected_urls: List[str] = []
                for i in parsed_indices:
                    try:
                        entry = display_entries[i - 1]
                    except Exception:
                        continue
                    u = _entry_to_url(entry)
                    if u and u not in selected_urls:
                        selected_urls.append(u)
                # If we can expand per-entry URLs, return them.
                if selected_urls and len(selected_urls) == len(parsed_indices):
                    return {"cancel": False, "playlist_items": None, "selected_urls": selected_urls}
                # yt-dlp accepts comma-separated 1-based indices for playlist_items
                return {"cancel": False, "playlist_items": ",".join(str(i) for i in parsed_indices), "selected_urls": []}
            # Playlist/multi-entry detection: if the URL has multiple items and the user didn't
            # specify -item, prompt for @ selection (supports @* for all).
            if len(supported_url) == 1 and not playlist_items and not ytdl_format:
                candidate_url = supported_url[0]
                selection_info = _maybe_prompt_playlist_items(candidate_url)
                if selection_info is not None:
                    playlist_selection_handled = True
                    if bool(selection_info.get("cancel")):
                        return 0
                    selected_urls = selection_info.get("selected_urls")
                    if isinstance(selected_urls, list) and selected_urls:
                        # Expand playlist/channel URL into per-entry URLs so that de-dup preflight
                        # and downloads operate per file.
                        supported_url = selected_urls
                        playlist_items = None
                    else:
                        playlist_items = selection_info.get("playlist_items")
            # If no -item, no explicit -format specified, and single URL, show the format table.
            # Do NOT stop to show formats when -audio is used (auto-pick) or when -clip is used.
@@ -1232,8 +1585,15 @@ class Download_Media(Cmdlet):
                and not playlist_items
                and not ytdl_format
                and len(supported_url) == 1
                and not playlist_selection_handled
            ):
                url = supported_url[0]
                canonical_url = _canonicalize_url_for_storage(url)
                if not _preflight_url_duplicate(canonical_url, extra_urls=[url]):
                    log(f"Skipping download: {url}", file=sys.stderr)
                    return 0
                formats = list_formats(url, no_playlist=False)
                if formats and len(formats) > 1:
@@ -1379,12 +1739,18 @@ class Download_Media(Cmdlet):
            # Download each URL
            downloaded_count = 0
            clip_sections_spec = self._build_clip_sections_spec(clip_range)
            quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
            for url in supported_url:
                try:
                    debug(f"Processing: {url}")
                    canonical_url = _canonicalize_url_for_storage(url)
                    # Preflight: warn if URL already exists in storage backends.
                    if not _preflight_url_duplicate(canonical_url, extra_urls=[url]):
                        log(f"Skipping download: {url}", file=sys.stderr)
                        continue
                    # If playlist_items is specified but looks like a format ID (e.g. from table selection),
                    # treat it as a format selector instead of playlist items.
                    # This handles the case where @N selection passes -item <format_id>
@@ -1532,24 +1898,17 @@ class Download_Media(Cmdlet):
        if title and f"title:{title}" not in tag:
            tag.insert(0, f"title:{title}")
-        # Build a single canonical URL field; prefer yt-dlp provided webpage_url or info.url,
+        # Store the canonical URL for de-dup/search purposes.
-        # but fall back to the original requested URL.  If multiple unique urls are available,
+        # Prefer yt-dlp's webpage_url, and do not mix in the raw requested URL (which may contain timestamps).
-        # join them into a comma-separated string.
+        final_url = None
        urls_to_consider: List[str] = []
        try:
-            page_url = info.get("webpage_url") or info.get("url")
+            page_url = info.get("webpage_url") or info.get("original_url") or info.get("url")
            if page_url:
-                urls_to_consider.append(str(page_url))
+                final_url = str(page_url)
        except Exception:
-            pass
+            final_url = None
-        if url:
+        if not final_url and url:
-            urls_to_consider.append(str(url))
+            final_url = str(url)
        seen_urls: List[str] = []
        for u in urls_to_consider:
            if u and u not in seen_urls:
                seen_urls.append(u)
        final_url = ",".join(seen_urls) if seen_urls else None
        # Construct canonical PipeObject dict: hash, store, path, url, title, tags
        # Prefer explicit backend names (storage_name/storage_location). If none, default to PATH
@@ -1561,6 +1920,7 @@ class Download_Media(Cmdlet):
            "url": final_url,
            "tag": tag,
            "action": "cmdlet:download-media",
            "is_temp": True,
            # download_mode removed (deprecated), keep media_kind
            "store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH",
            "media_kind": "video" if opts.mode == "video" else "audio",
@@ -184,6 +184,32 @@ class Get_Metadata(Cmdlet):
            mime_type = metadata.get("mime") or metadata.get("ext", "")
            file_size = metadata.get("size")
            duration_seconds = metadata.get("duration")
            if duration_seconds is None:
                duration_seconds = metadata.get("duration_seconds")
            if duration_seconds is None:
                duration_seconds = metadata.get("length")
            if duration_seconds is None and isinstance(metadata.get("duration_ms"), (int, float)):
                try:
                    duration_seconds = float(metadata["duration_ms"]) / 1000.0
                except Exception:
                    duration_seconds = None
            if isinstance(duration_seconds, str):
                s = duration_seconds.strip()
                if s:
                    try:
                        duration_seconds = float(s)
                    except ValueError:
                        if ":" in s:
                            parts = [p.strip() for p in s.split(":") if p.strip()]
                            if len(parts) in {2, 3} and all(p.isdigit() for p in parts):
                                nums = [int(p) for p in parts]
                                if len(nums) == 2:
                                    duration_seconds = float(nums[0] * 60 + nums[1])
                                else:
                                    duration_seconds = float(nums[0] * 3600 + nums[1] * 60 + nums[2])
                        else:
                            duration_seconds = None
            pages = metadata.get("pages")
            url = metadata.get("url") or []
            imported_ts = self._extract_imported_ts(metadata)
@@ -12,7 +12,13 @@ from __future__ import annotations
 import sys
-from SYS.logger import log, debug
+try:
 	from Provider.openlibrary import OpenLibrary
 	_ol_scrape_isbn_metadata = OpenLibrary.scrape_isbn_metadata
 	_ol_scrape_openlibrary_metadata = OpenLibrary.scrape_openlibrary_metadata
 except Exception:
 	_ol_scrape_isbn_metadata = None  # type: ignore[assignment]
 	_ol_scrape_openlibrary_metadata = None  # type: ignore[assignment]
 from Provider.metadata_provider import get_metadata_provider, list_metadata_providers
 import subprocess
 from pathlib import Path
@@ -31,6 +37,10 @@ except ImportError:
 	extract_title = None
 _scrape_isbn_metadata = _ol_scrape_isbn_metadata  # type: ignore[assignment]
 _scrape_openlibrary_metadata = _ol_scrape_openlibrary_metadata  # type: ignore[assignment]
@@ -691,249 +701,22 @@ def _extract_url_formats(formats: list) -> List[Tuple[str, str]]:
 def _scrape_isbn_metadata(isbn: str) -> List[str]:
-	"""Scrape metadata for an ISBN using Open Library API."""
+	if _ol_scrape_isbn_metadata is None:
-	new_tags = []
+		log("OpenLibrary scraper unavailable", file=sys.stderr)
 		return []
 	try:
-		from ..API.HTTP import HTTPClient
+		return list(_ol_scrape_isbn_metadata(isbn))
 		import json as json_module
 		isbn_clean = isbn.replace('-', '').strip()
 		url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
 		try:
 			with HTTPClient() as client:
 				response = client.get(url)
 				response.raise_for_status()
 				data = json_module.loads(response.content.decode('utf-8'))
 		except Exception as e:
 			log(f"Failed to fetch ISBN metadata: {e}", file=sys.stderr)
 			return []
 		if not data:
 			log(f"No ISBN metadata found for: {isbn}")
 			return []
 		book_data = next(iter(data.values()), None)
 		if not book_data:
 			return []
 		if 'title' in book_data:
 			new_tags.append(f"title:{book_data['title']}")
 		if 'authors' in book_data and isinstance(book_data['authors'], list):
 			for author in book_data['authors'][:3]:
 				if 'name' in author:
 					new_tags.append(f"author:{author['name']}")
 		if 'publish_date' in book_data:
 			new_tags.append(f"publish_date:{book_data['publish_date']}")
 		if 'publishers' in book_data and isinstance(book_data['publishers'], list):
 			for pub in book_data['publishers'][:1]:
 				if 'name' in pub:
 					new_tags.append(f"publisher:{pub['name']}")
 		if 'description' in book_data:
 			desc = book_data['description']
 			if isinstance(desc, dict) and 'value' in desc:
 				desc = desc['value']
 			if desc:
 				desc_str = str(desc).strip()
 				# Include description if available (limit to 200 chars to keep it manageable)
 				if len(desc_str) > 0:
 					new_tags.append(f"description:{desc_str[:200]}")
 		if 'number_of_pages' in book_data:
 			page_count = book_data['number_of_pages']
 			if page_count and isinstance(page_count, int) and page_count > 0:
 				new_tags.append(f"pages:{page_count}")
 		if 'identifiers' in book_data and isinstance(book_data['identifiers'], dict):
 			identifiers = book_data['identifiers']
 			if 'openlibrary' in identifiers:
 				ol_ids = identifiers['openlibrary']
 				if isinstance(ol_ids, list) and ol_ids:
 					new_tags.append(f"openlibrary:{ol_ids[0]}")
 				elif isinstance(ol_ids, str):
 					new_tags.append(f"openlibrary:{ol_ids}")
 			if 'lccn' in identifiers:
 				lccn_list = identifiers['lccn']
 				if isinstance(lccn_list, list) and lccn_list:
 					new_tags.append(f"lccn:{lccn_list[0]}")
 				elif isinstance(lccn_list, str):
 					new_tags.append(f"lccn:{lccn_list}")
 			if 'oclc' in identifiers:
 				oclc_list = identifiers['oclc']
 				if isinstance(oclc_list, list) and oclc_list:
 					new_tags.append(f"oclc:{oclc_list[0]}")
 				elif isinstance(oclc_list, str):
 					new_tags.append(f"oclc:{oclc_list}")
 			if 'goodreads' in identifiers:
 				goodreads_list = identifiers['goodreads']
 				if isinstance(goodreads_list, list) and goodreads_list:
 					new_tags.append(f"goodreads:{goodreads_list[0]}")
 				elif isinstance(goodreads_list, str):
 					new_tags.append(f"goodreads:{goodreads_list}")
 			if 'librarything' in identifiers:
 				lt_list = identifiers['librarything']
 				if isinstance(lt_list, list) and lt_list:
 					new_tags.append(f"librarything:{lt_list[0]}")
 				elif isinstance(lt_list, str):
 					new_tags.append(f"librarything:{lt_list}")
 			if 'doi' in identifiers:
 				doi_list = identifiers['doi']
 				if isinstance(doi_list, list) and doi_list:
 					new_tags.append(f"doi:{doi_list[0]}")
 				elif isinstance(doi_list, str):
 					new_tags.append(f"doi:{doi_list}")
 			if 'internet_archive' in identifiers:
 				ia_list = identifiers['internet_archive']
 				if isinstance(ia_list, list) and ia_list:
 					new_tags.append(f"internet_archive:{ia_list[0]}")
 				elif isinstance(ia_list, str):
 					new_tags.append(f"internet_archive:{ia_list}")
 		log(f"Found {len(new_tags)} tag(s) from ISBN lookup")
 		return new_tags
 	except Exception as e:
 		log(f"ISBN scraping error: {e}", file=sys.stderr)
 		return []
 def _scrape_openlibrary_metadata(olid: str) -> List[str]:
-	"""Scrape metadata for an OpenLibrary ID using the .json API endpoint.
+	if _ol_scrape_openlibrary_metadata is None:
-	
+		log("OpenLibrary scraper unavailable", file=sys.stderr)
 	Fetches from https://openlibrary.org/books/{OLID}.json and extracts:
 	- Title, authors, publish date, publishers
 	- Description
 	- Subjects as freeform tags (without namespace prefix)
 	- Identifiers (ISBN, LCCN, OCLC, etc.)
 	"""
 	new_tags = []
 	try:
 		from ..API.HTTP import HTTPClient
 		import json as json_module
 		# Format: OL9674499M or just 9674499M
 		olid_clean = olid.replace('OL', '').replace('M', '')
 		if not olid_clean.isdigit():
 			olid_clean = olid
 		# Ensure we have the full OLID format for the URL
 		if not olid.startswith('OL'):
 			url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
 		else:
 			url = f"https://openlibrary.org/books/{olid}.json"
 		try:
 			with HTTPClient() as client:
 				response = client.get(url)
 				response.raise_for_status()
 				data = json_module.loads(response.content.decode('utf-8'))
 		except Exception as e:
 			log(f"Failed to fetch OpenLibrary metadata: {e}", file=sys.stderr)
 		return []
-		
+	try:
-		if not data:
+		return list(_ol_scrape_openlibrary_metadata(olid))
 			log(f"No OpenLibrary metadata found for: {olid}")
 			return []
 		# Add title
 		if 'title' in data:
 			new_tags.append(f"title:{data['title']}")
 		# Add authors
 		if 'authors' in data and isinstance(data['authors'], list):
 			for author in data['authors'][:3]:
 				if isinstance(author, dict) and 'name' in author:
 					new_tags.append(f"author:{author['name']}")
 				elif isinstance(author, str):
 					new_tags.append(f"author:{author}")
 		# Add publish date
 		if 'publish_date' in data:
 			new_tags.append(f"publish_date:{data['publish_date']}")
 		# Add publishers
 		if 'publishers' in data and isinstance(data['publishers'], list):
 			for pub in data['publishers'][:1]:
 				if isinstance(pub, dict) and 'name' in pub:
 					new_tags.append(f"publisher:{pub['name']}")
 				elif isinstance(pub, str):
 					new_tags.append(f"publisher:{pub}")
 		# Add description
 		if 'description' in data:
 			desc = data['description']
 			if isinstance(desc, dict) and 'value' in desc:
 				desc = desc['value']
 			if desc:
 				desc_str = str(desc).strip()
 				if len(desc_str) > 0:
 					new_tags.append(f"description:{desc_str[:200]}")
 		# Add number of pages
 		if 'number_of_pages' in data:
 			page_count = data['number_of_pages']
 			if page_count and isinstance(page_count, int) and page_count > 0:
 				new_tags.append(f"pages:{page_count}")
 		# Add subjects as FREEFORM tags (no namespace prefix)
 		if 'subjects' in data and isinstance(data['subjects'], list):
 			for subject in data['subjects'][:10]:
 				if subject and isinstance(subject, str):
 					subject_clean = str(subject).strip()
 					if subject_clean and subject_clean not in new_tags:
 						new_tags.append(subject_clean)
 		# Add identifiers
 		if 'identifiers' in data and isinstance(data['identifiers'], dict):
 			identifiers = data['identifiers']
 			if 'isbn_10' in identifiers:
 				isbn_10_list = identifiers['isbn_10']
 				if isinstance(isbn_10_list, list) and isbn_10_list:
 					new_tags.append(f"isbn_10:{isbn_10_list[0]}")
 				elif isinstance(isbn_10_list, str):
 					new_tags.append(f"isbn_10:{isbn_10_list}")
 			if 'isbn_13' in identifiers:
 				isbn_13_list = identifiers['isbn_13']
 				if isinstance(isbn_13_list, list) and isbn_13_list:
 					new_tags.append(f"isbn_13:{isbn_13_list[0]}")
 				elif isinstance(isbn_13_list, str):
 					new_tags.append(f"isbn_13:{isbn_13_list}")
 			if 'lccn' in identifiers:
 				lccn_list = identifiers['lccn']
 				if isinstance(lccn_list, list) and lccn_list:
 					new_tags.append(f"lccn:{lccn_list[0]}")
 				elif isinstance(lccn_list, str):
 					new_tags.append(f"lccn:{lccn_list}")
 			if 'oclc_numbers' in identifiers:
 				oclc_list = identifiers['oclc_numbers']
 				if isinstance(oclc_list, list) and oclc_list:
 					new_tags.append(f"oclc:{oclc_list[0]}")
 				elif isinstance(oclc_list, str):
 					new_tags.append(f"oclc:{oclc_list}")
 			if 'goodreads' in identifiers:
 				goodreads_list = identifiers['goodreads']
 				if isinstance(goodreads_list, list) and goodreads_list:
 					new_tags.append(f"goodreads:{goodreads_list[0]}")
 				elif isinstance(goodreads_list, str):
 					new_tags.append(f"goodreads:{goodreads_list}")
 		log(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
 		return new_tags
 	except Exception as e:
 		log(f"OpenLibrary scraping error: {e}", file=sys.stderr)
 		return []
@@ -1,28 +1,40 @@
 from __future__ import annotations
-from typing import Any, Dict, Sequence
+from dataclasses import dataclass
 from typing import Any, Dict, List, Sequence
 import sys
 from . import register
 import pipeline as ctx
-from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
+from ._shared import Cmdlet, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
 from SYS.logger import log
 from Store import Store
@dataclass
 class UrlItem:
    url: str
    hash: str
    store: str
 class Get_Url(Cmdlet):
    """Get url associated with files via hash+store."""
-    NAME = "get-url"
+    def __init__(self) -> None:
-    SUMMARY = "List url associated with a file"
+        super().__init__(
-    USAGE = "@1 | get-url"
+            name="get-url",
-    ARGS = [
+            summary="List url associated with a file",
            usage="@1 | get-url",
            arg=[
                SharedArgs.HASH,
                SharedArgs.STORE,
-    ]
+            ],
-    DETAIL = [
+            detail=[
                "- Lists all url associated with file identified by hash+store",
-    ]
+            ],
            exec=self.run,
        )
        self.register()
    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Get url for file via hash+store backend."""
@@ -53,17 +65,33 @@ class Get_Url(Cmdlet):
            urls = backend.get_url(file_hash)
-            if urls:
+            from result_table import ResultTable
-                for u in urls:
+
-                    # Emit rich object for pipeline compatibility
+            title = str(get_field(result, "title") or "").strip()
-                    ctx.emit({
+            table_title = "Title"
-                        "url": u,
+            if title:
-                        "hash": file_hash,
+                table_title = f"Title: {title}"
-                        "store": store_name,
+
-                    })
+            table = ResultTable(table_title, max_columns=1).set_preserve_order(True)
-                return 0
+            table.set_source_command("get-url", [])
-            else:
+
-                ctx.emit("No url found")
+            items: List[UrlItem] = []
            for u in list(urls or []):
                u = str(u or "").strip()
                if not u:
                    continue
                row = table.add_row()
                row.add_column("Url", u)
                item = UrlItem(url=u, hash=file_hash, store=str(store_name))
                items.append(item)
                ctx.emit(item)
            # Make this a real result table so @.. / @,, can navigate it
            ctx.set_last_result_table(table if items else None, items, subject=result)
            if not items:
                log("No url found", file=sys.stderr)
            return 0
        except KeyError:
@@ -74,7 +102,6 @@ class Get_Url(Cmdlet):
            return 1
-# Register cmdlet
+CMDLET = Get_Url()
 register(["get-url", "get_url"])(Get_Url)
@@ -3,7 +3,6 @@ from __future__ import annotations
 from typing import Any, Dict, Sequence, List, Optional, Tuple
 from pathlib import Path
 from dataclasses import dataclass, field
 from collections import OrderedDict
 import re
 import json
@@ -11,57 +10,9 @@ import sys
 from SYS.logger import log, debug
-from ._shared import Cmdlet, CmdletArg, get_field, should_show_help
+from ._shared import Cmdlet, CmdletArg, get_field, should_show_help, normalize_hash, first_title_tag
 import pipeline as ctx
 # Optional dependencies
 try:
    import mutagen  # type: ignore
 except ImportError:  # pragma: no cover
    mutagen = None  # type: ignore
 try:
    from config import get_hydrus_url, resolve_output_dir
 except Exception:  # pragma: no cover
    get_hydrus_url = None  # type: ignore
    resolve_output_dir = None  # type: ignore
 try:
    from API.HydrusNetwork import HydrusNetwork, HydrusRequestError
 except ImportError:  # pragma: no cover
    HydrusNetwork = None  # type: ignore
    HydrusRequestError = RuntimeError  # type: ignore
 try:
    from SYS.utils import sha256_file
 except ImportError:  # pragma: no cover
    sha256_file = None  # type: ignore
 try:
    from SYS.utils_constant import mime_maps
 except ImportError:  # pragma: no cover
    mime_maps = {}  # type: ignore
@dataclass(slots=True)
 class SearchRecord:
    path: str
    size_bytes: int | None = None
    duration_seconds: str | None = None
    tag: str | None = None
    hash: str | None = None
    def as_dict(self) -> dict[str, str]:
        payload: dict[str, str] = {"path": self.path}
        if self.size_bytes is not None:
            payload["size"] = str(self.size_bytes)
        if self.duration_seconds:
            payload["duration"] = self.duration_seconds
        if self.tag:
            payload["tag"] = self.tag
        if self.hash:
            payload["hash"] = self.hash
        return payload
 STORAGE_ORIGINS = {"local", "hydrus", "folder"}
@@ -86,12 +37,15 @@ class Search_Store(Cmdlet):
            detail=[
                "Search across storage backends: Folder stores and Hydrus instances",
                "Use -store to search a specific backend by name",
                "URL search: url:* (any URL) or url:<value> (URL substring)",
                "Filter results by: tag, size, type, duration",
                "Results include hash for downstream commands (get-file, add-tag, etc.)",
                "Examples:",
                "search-store foo                          # Search all storage backends",
                "search-store -store home '*'              # Search 'home' Hydrus instance",
                "search-store -store test 'video'          # Search 'test' folder store",
                "search-store 'url:*'                      # Files that have any URL",
                "search-store 'url:youtube.com'            # Files whose URL contains substring",
                "search-store song -type audio             # Search for audio files",
                "search-store movie -tag action            # Search with tag filter",
            ],
@@ -100,6 +54,40 @@ class Search_Store(Cmdlet):
        self.register()
    # --- Helper methods -------------------------------------------------
    @staticmethod
    def _parse_hash_query(query: str) -> List[str]:
        """Parse a `hash:` query into a list of normalized 64-hex SHA256 hashes.
        Supported examples:
        - hash:<h1>,<h2>,<h3>
        - Hash: <h1> <h2> <h3>
        - hash:{<h1>, <h2>}
        """
        q = str(query or "").strip()
        if not q:
            return []
        m = re.match(r"^hash(?:es)?\s*:\s*(.+)$", q, flags=re.IGNORECASE)
        if not m:
            return []
        rest = (m.group(1) or "").strip()
        if rest.startswith("{") and rest.endswith("}"):
            rest = rest[1:-1].strip()
        if rest.startswith("[") and rest.endswith("]"):
            rest = rest[1:-1].strip()
        # Split on commas and whitespace.
        raw_parts = [p.strip() for p in re.split(r"[\s,]+", rest) if p.strip()]
        out: List[str] = []
        for part in raw_parts:
            h = normalize_hash(part)
            if not h:
                continue
            if h not in out:
                out.append(h)
        return out
    @staticmethod
    def _normalize_extension(ext_value: Any) -> str:
        """Sanitize extension strings to alphanumerics and cap at 5 chars."""
@@ -150,10 +138,10 @@ class Search_Store(Cmdlet):
        # Parse arguments
        query = ""
-        tag_filters: List[str] = []
+        _tag_filters: List[str] = []
-        size_filter: Optional[Tuple[str, int]] = None
+        _size_filter: Optional[Tuple[str, int]] = None
-        duration_filter: Optional[Tuple[str, float]] = None
+        _duration_filter: Optional[Tuple[str, float]] = None
-        type_filter: Optional[str] = None
+        _type_filter: Optional[str] = None
        storage_backend: Optional[str] = None
        limit = 100
        searched_backends: List[str] = []
@@ -166,7 +154,7 @@ class Search_Store(Cmdlet):
                storage_backend = args_list[i + 1]
                i += 2
            elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
-                tag_filters.append(args_list[i + 1])
+                _tag_filters.append(args_list[i + 1])
                i += 2
            elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
                try:
@@ -175,7 +163,7 @@ class Search_Store(Cmdlet):
                    limit = 100
                i += 2
            elif low in {"-type", "--type"} and i + 1 < len(args_list):
-                type_filter = args_list[i + 1].lower()
+                _type_filter = args_list[i + 1].lower()
                i += 2
            elif not arg.startswith("-"):
                query = f"{query} {arg}".strip() if query else arg
@@ -195,6 +183,8 @@ class Search_Store(Cmdlet):
        if store_filter and not storage_backend:
            storage_backend = store_filter
        hash_query = self._parse_hash_query(query)
        if not query:
            log("Provide a search query", file=sys.stderr)
            return 1
@@ -230,12 +220,136 @@ class Search_Store(Cmdlet):
                    table_title += f" [{storage_backend}]"
                table = ResultTable(table_title)
                if hash_query:
                    try:
                        table.set_preserve_order(True)
                    except Exception:
                        pass
                from Store import Store
                storage = Store(config=config or {})
                from Store._base import Store as BaseStore
                backend_to_search = storage_backend or None
                if hash_query:
                    # Explicit hash list search: build rows from backend metadata.
                    backends_to_try: List[str] = []
                    if backend_to_search:
                        backends_to_try = [backend_to_search]
                    else:
                        backends_to_try = list(storage.list_backends())
                    found_any = False
                    for h in hash_query:
                        resolved_backend_name: Optional[str] = None
                        resolved_backend = None
                        for backend_name in backends_to_try:
                            try:
                                backend = storage[backend_name]
                            except Exception:
                                continue
                            try:
                                # If get_metadata works, consider it a hit; get_file can be optional (e.g. remote URL).
                                meta = backend.get_metadata(h)
                                if meta is None:
                                    continue
                                resolved_backend_name = backend_name
                                resolved_backend = backend
                                break
                            except Exception:
                                continue
                        if resolved_backend_name is None or resolved_backend is None:
                            continue
                        found_any = True
                        searched_backends.append(resolved_backend_name)
                        # Resolve a path/URL string if possible
                        path_str: Optional[str] = None
                        try:
                            maybe_path = resolved_backend.get_file(h)
                            if isinstance(maybe_path, Path):
                                path_str = str(maybe_path)
                            elif isinstance(maybe_path, str) and maybe_path:
                                path_str = maybe_path
                        except Exception:
                            path_str = None
                        meta_obj: Dict[str, Any] = {}
                        try:
                            meta_obj = resolved_backend.get_metadata(h) or {}
                        except Exception:
                            meta_obj = {}
                        tags_list: List[str] = []
                        try:
                            tag_result = resolved_backend.get_tag(h)
                            if isinstance(tag_result, tuple) and tag_result:
                                maybe_tags = tag_result[0]
                            else:
                                maybe_tags = tag_result
                            if isinstance(maybe_tags, list):
                                tags_list = [str(t).strip() for t in maybe_tags if isinstance(t, str) and str(t).strip()]
                        except Exception:
                            tags_list = []
                        title_from_tag: Optional[str] = None
                        try:
                            title_tag = first_title_tag(tags_list)
                            if title_tag and ":" in title_tag:
                                title_from_tag = title_tag.split(":", 1)[1].strip()
                        except Exception:
                            title_from_tag = None
                        title = title_from_tag or meta_obj.get("title") or meta_obj.get("name")
                        if not title and path_str:
                            try:
                                title = Path(path_str).stem
                            except Exception:
                                title = path_str
                        ext_val = meta_obj.get("ext") or meta_obj.get("extension")
                        if not ext_val and path_str:
                            try:
                                ext_val = Path(path_str).suffix
                            except Exception:
                                ext_val = None
                        size_bytes = meta_obj.get("size")
                        if size_bytes is None:
                            size_bytes = meta_obj.get("size_bytes")
                        try:
                            size_bytes_int: Optional[int] = int(size_bytes) if size_bytes is not None else None
                        except Exception:
                            size_bytes_int = None
                        payload: Dict[str, Any] = {
                            "title": str(title or h),
                            "hash": h,
                            "store": resolved_backend_name,
                            "path": path_str,
                            "ext": self._normalize_extension(ext_val),
                            "size_bytes": size_bytes_int,
                            "tag": tags_list,
                        }
                        table.add_result(payload)
                        results_list.append(payload)
                        ctx.emit(payload)
                    if found_any:
                        ctx.set_last_result_table(table, results_list)
                        db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
                        db.update_worker_status(worker_id, 'completed')
                        return 0
                    log("No results found", file=sys.stderr)
                    db.append_worker_stdout(worker_id, json.dumps([], indent=2))
                    db.update_worker_status(worker_id, 'completed')
                    return 0
                if backend_to_search:
                    searched_backends.append(backend_to_search)
                    target_backend = storage[backend_to_search]
@@ -243,7 +357,9 @@ class Search_Store(Cmdlet):
                        log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
                        db.update_worker_status(worker_id, 'error')
                        return 1
                    debug(f"[search-store] Searching '{backend_to_search}'")
                    results = target_backend.search(query, limit=limit)
                    debug(f"[search-store] '{backend_to_search}' -> {len(results or [])} result(s)")
                else:
                    from API.HydrusNetwork import is_hydrus_available
                    hydrus_available = is_hydrus_available(config or {})
@@ -257,7 +373,9 @@ class Search_Store(Cmdlet):
                                continue
                            searched_backends.append(backend_name)
                            debug(f"[search-store] Searching '{backend_name}'")
                            backend_results = backend.search(query, limit=limit - len(all_results))
                            debug(f"[search-store] '{backend_name}' -> {len(backend_results or [])} result(s)")
                            if backend_results:
                                all_results.extend(backend_results)
                            if len(all_results) >= limit:
@@ -317,11 +435,6 @@ class Search_Store(Cmdlet):
                        results_list.append(normalized)
                        ctx.emit(normalized)
                    # Debug: Verify table rows match items list
                    debug(f"[search-store] Added {len(table.rows)} rows to table, {len(results_list)} items to results_list")
                    if len(table.rows) != len(results_list):
                        debug(f"[search-store] WARNING: Table/items mismatch! rows={len(table.rows)} items={len(results_list)}", file=sys.stderr)
                    ctx.set_last_result_table(table, results_list)
                    db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
                else:
@@ -3,14 +3,12 @@ import re
 import subprocess
 import sys
 import shutil
 import sqlite3
 import requests
 from SYS.logger import log, debug
 from urllib.parse import urlsplit, urlunsplit, unquote
 from collections import deque
 from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
-from models import PipeObject, FileRelationshipTracker, _get_file_hash
+from models import FileRelationshipTracker
 try:
    import musicbrainzngs  # type: ignore
 except ImportError:  # pragma: no cover
@@ -332,6 +330,112 @@ def _generate_hydrus_url_variants(url: str) -> List[str]:
    return variants
 def normalize_urls(value: Any) -> List[str]:
    """Normalize a URL field into a stable, deduplicated list.
    Accepts:
    - None
    - a single URL string (optionally containing multiple URLs)
    - a list/tuple/set of URL strings
    This helper is used by cmdlets/stores/pipeline to keep `url` consistent.
    """
    def _iter_raw_urls(raw: Any) -> Iterable[str]:
        if raw is None:
            return
        if isinstance(raw, str):
            text = raw.strip()
            if not text:
                return
            # Support legacy prefixes like "url:https://...".
            if text.lower().startswith("url:"):
                text = text.split(":", 1)[1].strip()
            # Prefer extracting obvious URLs to avoid splitting inside query strings.
            matches = re.findall(r"https?://[^\s,]+", text, flags=re.IGNORECASE)
            if matches:
                for m in matches:
                    yield m
                return
            # Fallback: split on commas/whitespace.
            for token in text.replace("\n", " ").replace("\r", " ").replace(",", " ").split():
                if token:
                    yield token
            return
        if isinstance(raw, (list, tuple, set)):
            for item in raw:
                if item is None:
                    continue
                if isinstance(item, str):
                    if item.strip():
                        yield item
                else:
                    text = str(item).strip()
                    if text:
                        yield text
            return
        # Last resort: string-coerce.
        text = str(raw).strip()
        if text:
            yield text
    def _canonicalize(url_text: str) -> Optional[str]:
        u = str(url_text or "").strip()
        if not u:
            return None
        # Trim common wrappers and trailing punctuation.
        u = u.strip("<>\"' ")
        u = u.rstrip(")].,;\"")
        if not u:
            return None
        lower = u.lower()
        if not (lower.startswith("http://") or lower.startswith("https://")):
            return u
        try:
            parsed = urlsplit(u)
        except Exception:
            return u
        scheme = (parsed.scheme or "").lower()
        netloc = (parsed.netloc or "").lower()
        path = unquote(parsed.path or "")
        query = parsed.query or ""
        # Normalize default ports.
        if scheme == "http" and netloc.endswith(":80"):
            netloc = netloc[:-3]
        elif scheme == "https" and netloc.endswith(":443"):
            netloc = netloc[:-4]
        # Prefer no trailing slash except root.
        if path and path != "/":
            path = path.rstrip("/")
        # Fragments are not part of the resource.
        return urlunsplit((scheme, netloc, path, query, ""))
    seen: Set[str] = set()
    out: List[str] = []
    for raw_url in _iter_raw_urls(value):
        canonical = _canonicalize(raw_url)
        if not canonical:
            continue
        if canonical in seen:
            continue
        seen.add(canonical)
        out.append(canonical)
    return out
 def value_normalize(value: str) -> str:
    """Normalize whitespace: collapse internal spaces, strip, remove newlines."""
    value = value.replace("\n", " ").replace("\r", " ")
@@ -358,6 +462,7 @@ def import_pending_sidecars(db_root: Path, db: Any) -> None:
                    continue
                # Ensure file entry exists
                file_id: Optional[int] = None
                try:
                    cursor = db.connection.cursor() if db.connection else None
                    if cursor:
@@ -394,10 +499,16 @@ def import_pending_sidecars(db_root: Path, db: Any) -> None:
                        try:
                            cursor = db.connection.cursor() if db.connection else None
                            if cursor:
                                file_hash_value: Optional[str] = None
                                if hasattr(db, 'get_file_hash'):
                                    try:
                                        file_hash_value = db.get_file_hash(file_id)
                                    except Exception:
                                        file_hash_value = None
                                for tag in tags:
                                    cursor.execute(
                                        'INSERT OR IGNORE INTO tags (hash, tag) VALUES (?, ?)',
-                                        (file_hash_value, tag) if hasattr(db, 'get_file_hash') else (None, tag)
+                                        (file_hash_value, tag)
                                    )
                                db.connection.commit()
                        except Exception:
@@ -663,128 +774,6 @@ def fetch_musicbrainz_tags(mbid: str, entity: str) -> Dict[str, object]:
    return {"source": "musicbrainz", "id": mbid, "tag": tags, "entity": entity}
 def fetch_openlibrary_tags(ol_id: str) -> Dict[str, object]:
    """Fetch metadata tags from OpenLibrary.
    Args:
        ol_id: OpenLibrary ID (e.g., 'OL123456M' for a book)
    Returns:
        Dictionary with 'tag' key containing list of extracted tags
    """
    import urllib.request
    # Normalize OL ID
    ol_id = ol_id.strip().upper()
    if not ol_id.startswith('OL'):
        ol_id = f'OL{ol_id}'
    # Fetch from OpenLibrary API
    url = f"https://openlibrary.org/books/{ol_id}.json"
    tags: List[str] = []
    try:
        with urllib.request.urlopen(url, timeout=10) as response:
            data = json.loads(response.read().decode('utf-8'))
    except Exception as e:
        raise ValueError(f"Failed to fetch OpenLibrary data for {ol_id}: {e}")
    # Add OpenLibrary ID tag
    _add_tag(tags, "openlibrary", ol_id)
    # Extract title
    _add_tag(tags, "title", data.get("title"))
    # Extract subtitle if present
    if data.get("subtitle"):
        _add_tag(tags, "subtitle", data["subtitle"])
    # Extract authors
    authors = data.get("authors", [])
    author_names: List[str] = []
    for author in authors:
        if isinstance(author, dict):
            name = author.get("name")
        else:
            name = str(author)
        if name:
            author_names.append(name)
    if author_names:
        _extend_tags(tags, "author", author_names)
    # Extract publication details
    if data.get("publish_date"):
        _add_tag(tags, "publish_date", data["publish_date"])
        # Extract year if present
        year_match = re.search(r'\b(\d{4})\b', str(data.get("publish_date", "")))
        if year_match:
            _add_tag(tags, "year", year_match.group(1))
    # Extract publishers
    publishers = data.get("publishers", [])
    if publishers:
        publisher_names = []
        for pub in publishers:
            if isinstance(pub, dict):
                name = pub.get("name")
            else:
                name = str(pub)
            if name:
                publisher_names.append(name)
        if publisher_names:
            _extend_tags(tags, "publisher", publisher_names)
    # Extract languages
    languages = data.get("languages", [])
    if languages:
        lang_codes = []
        for lang in languages:
            if isinstance(lang, dict):
                code = lang.get("key", "").split("/")[-1]
            else:
                code = str(lang).split("/")[-1]
            if code and code != "":
                lang_codes.append(code)
        if lang_codes:
            _extend_tags(tags, "language", lang_codes)
    # Extract ISBN
    isbns = data.get("isbn_10", []) + data.get("isbn_13", [])
    if isbns:
        for isbn in isbns[:1]:  # Just take first one
            if len(str(isbn)) == 10:
                _add_tag(tags, "isbn_10", isbn)
            elif len(str(isbn)) == 13:
                _add_tag(tags, "isbn_13", isbn)
    # Extract page count
    _add_tag(tags, "pages", data.get("number_of_pages"))
    # Extract genres/subjects (OpenLibrary calls them subjects)
    # Subjects are added as plain freeform tags (no namespace prefix)
    subjects = data.get("subjects", [])
    if subjects:
        for subject in subjects[:10]:  # Limit to 10 subjects
            if isinstance(subject, dict):
                name = subject.get("name")
            else:
                name = str(subject)
            if name:
                # Add subject as plain tag without "subject:" prefix
                normalized = value_normalize(str(name))
                if normalized:
                    tags.append(normalized)
    # Extract OpenLibrary description
    description = data.get("description")
    if description:
        if isinstance(description, dict):
            description = description.get("value")
        _add_tag(tags, "summary", description)
    return {"source": "openlibrary", "id": ol_id, "tag": tags}
 def _append_unique(target: List[str], seen: Set[str], value: Optional[str]) -> None:
    """Append a single value if not already in seen set (deduplication)."""
    if value is None:
@@ -1545,7 +1534,7 @@ def _derive_sidecar_path(media_path: Path) -> Path:
    return preferred
-def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]:
+def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]:  # pyright: ignore[reportUnusedFunction]
    """Read hash, tags, and url from sidecar file.
    Consolidated with read_tags_from_file - this extracts extra metadata (hash, url).
@@ -1559,7 +1548,7 @@ def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str]
    hash_value: Optional[str] = None
    tags: List[str] = []
-    url: List[str] = []
+    urls: List[str] = []
    for raw_line in raw.splitlines():
        line = raw_line.strip()
@@ -1574,15 +1563,15 @@ def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str]
            url_part = line.split(':', 1)[1].strip() if ':' in line else ''
            if url_part:
                for url_segment in url_part.split(','):
-                    for url in url_segment.split():
+                    for url_token in url_segment.split():
-                        url_clean = url.strip()
+                        url_clean = url_token.strip()
-                        if url_clean and url_clean not in url:
+                        if url_clean and url_clean not in urls:
-                            url.append(url_clean)
+                            urls.append(url_clean)
        else:
            # Everything else is a tag (including relationship: lines)
            tags.append(line)
-    return hash_value, tags, url
+    return hash_value, tags, urls
@@ -1827,63 +1816,6 @@ def apply_title_to_path(media_path: Path, tags: Iterable[str]) -> Path:
    return destination
 def _collect_search_roots(payload: Dict[str, Any]) -> List[Path]:
    roots: List[Path] = []
    for key in ('paths', 'search_paths', 'roots', 'directories'):
        raw = payload.get(key)
        if not raw:
            continue
        entries = raw if isinstance(raw, (list, tuple, set)) else [raw]
        for entry in entries:
            if not entry:
                continue
            try:
                candidate = Path(str(entry)).expanduser()
            except Exception:
                continue
            roots.append(candidate)
    if load_config is not None and resolve_output_dir is not None:
        try:
            config = load_config()
        except Exception:
            config = None
        if isinstance(config, dict) and config:
            try:
                default_root = resolve_output_dir(config)
            except Exception:
                default_root = None
            if default_root is not None:
                roots.append(default_root)
    return roots
 def _locate_sidecar_by_hash(hash_value: str, roots: Iterable[Path]) -> Optional[Path]:
    target = f'hash:{hash_value.strip().lower()}'
    for root in roots:
        try:
            root_path = root.expanduser()
        except Exception:
            continue
        if not root_path.exists() or not root_path.is_dir():
            continue
        for pattern in ('*.tag',):
            try:
                iterator = root_path.rglob(pattern)
            except OSError:
                continue
            for candidate in iterator:
                if not candidate.is_file():
                    continue
                try:
                    with candidate.open('r', encoding='utf-8', errors='ignore') as handle:
                        for line in handle:
                            if line.strip().lower() == target:
                                return candidate
                except OSError:
                    continue
    return None
 def sync_sidecar(payload: Dict[str, Any]) -> Dict[str, Any]:
    path_value = payload.get('path')
    if not path_value:
@@ -2506,8 +2438,8 @@ def write_tags_to_file(
        # Add known url if provided - each on separate line to prevent corruption
        if url:
-            for url in url:
+            for url_item in url:
-                content_lines.append(f"url:{url}")
+                content_lines.append(f"url:{url_item}")
        # Add tags
        if tags:
@@ -2642,10 +2574,10 @@ def detect_metadata_request(tag: str) -> Optional[Dict[str, str]]:
 def expand_metadata_tag(payload: Dict[str, Any]) -> Dict[str, Any]:
    tag = payload.get('tag')
    if not isinstance(tag, str):
-        return {'tags': []}
+        return {'tag': []}
    trimmed = value_normalize(tag)
    if not trimmed:
-        return {'tags': []}
+        return {'tag': []}
    request = detect_metadata_request(trimmed)
    tags: List[str] = []
    seen: Set[str] = set()
@@ -2653,7 +2585,7 @@ def expand_metadata_tag(payload: Dict[str, Any]) -> Dict[str, Any]:
        _append_unique(tags, seen, request['base'])
    else:
        _append_unique(tags, seen, trimmed)
-        return {'tags': tags}
+        return {'tag': tags}
    try:
        if request['source'] == 'imdb':
            data = imdb_tag(request['id'])
@@ -2662,8 +2594,15 @@ def expand_metadata_tag(payload: Dict[str, Any]) -> Dict[str, Any]:
    except Exception as exc:  # pragma: no cover - network/service errors
        return {'tag': tags, 'error': str(exc)}
    # Add tags from fetched data (no namespace, just unique append)
-    for tag in (data.get('tag') or []):
+    raw_tags = data.get('tag') if isinstance(data, dict) else None
-        _append_unique(tags, seen, tag)
+    if isinstance(raw_tags, str):
        tag_iter: Iterable[str] = [raw_tags]
    elif isinstance(raw_tags, (list, tuple, set)):
        tag_iter = [t for t in raw_tags if isinstance(t, str)]
    else:
        tag_iter = []
    for tag_value in tag_iter:
        _append_unique(tags, seen, tag_value)
    result = {
        'tag': tags,
        'source': request['source'],
@@ -3082,14 +3021,14 @@ def expand_tag_lists(tags_set: Set[str]) -> Set[str]:
    # Load adjective.json from workspace root
    adjective_path = Path(__file__).parent / "adjective.json"
    if not adjective_path.exists():
-        log.debug(f"adjective.json not found at {adjective_path}")
+        debug(f"adjective.json not found at {adjective_path}")
        return tags_set
    try:
        with open(adjective_path, 'r') as f:
            adjective_lists = json.load(f)
    except Exception as e:
-        log.error(f"Error loading adjective.json: {e}")
+        debug(f"Error loading adjective.json: {e}")
        return tags_set
    expanded_tags = set()
@@ -3108,10 +3047,10 @@ def expand_tag_lists(tags_set: Set[str]) -> Set[str]:
            if matched_list:
                # Add all tags from the list
                expanded_tags.update(matched_list)
-                log.info(f"Expanded {tag} to {len(matched_list)} tags")
+                debug(f"Expanded {tag} to {len(matched_list)} tags")
            else:
                # List not found, log warning but don't add the reference
-                log.warning(f"Tag list '{list_name}' not found in adjective.json")
+                debug(f"Tag list '{list_name}' not found in adjective.json")
        else:
            # Regular tag, keep as is
            expanded_tags.add(tag)
@@ -3194,98 +3133,6 @@ def build_book_tags(
    return deduped
 def fetch_openlibrary_metadata_tags(isbn: Optional[str] = None, olid: Optional[str] = None) -> List[str]:
    """Fetch book metadata from OpenLibrary and return as tags.
    Args:
        isbn: ISBN number (with or without isbn: prefix)
        olid: OpenLibrary ID
    Returns:
        List of tags extracted from OpenLibrary metadata
    """
    metadata_tags = []
    # Try OLID first (preferred), then ISBN
    url = None
    if olid:
        # Clean up OLID format
        olid_clean = str(olid).replace('OL', '').replace('M', '').replace('W', '')
        if olid_clean.isdigit():
            url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
        else:
            url = f"https://openlibrary.org/books/{olid}.json"
    elif isbn:
        # Clean up ISBN
        isbn_clean = str(isbn).replace('isbn:', '').strip()
        url = f"https://openlibrary.org/isbn/{isbn_clean}.json"
    if not url:
        return metadata_tags
    try:
        response = requests.get(url, timeout=10)
        if response.status_code != 200:
            return metadata_tags
        data = response.json()
        if not data:
            return metadata_tags
        # Extract title
        if 'title' in data:
            metadata_tags.append(f"title:{data['title']}")
        # Extract authors
        if 'authors' in data and isinstance(data['authors'], list):
            for author in data['authors'][:3]:
                if isinstance(author, dict) and 'name' in author:
                    metadata_tags.append(f"author:{author['name']}")
                elif isinstance(author, str):
                    metadata_tags.append(f"author:{author}")
        # Extract publish date
        if 'publish_date' in data:
            metadata_tags.append(f"publish_date:{data['publish_date']}")
        # Extract publishers
        if 'publishers' in data and isinstance(data['publishers'], list):
            for pub in data['publishers'][:1]:
                if isinstance(pub, dict) and 'name' in pub:
                    metadata_tags.append(f"publisher:{pub['name']}")
                elif isinstance(pub, str):
                    metadata_tags.append(f"publisher:{pub}")
        # Extract number of pages
        if 'number_of_pages' in data:
            page_count = data['number_of_pages']
            if page_count and isinstance(page_count, int) and page_count > 0:
                metadata_tags.append(f"pages:{page_count}")
        # Extract language
        if 'languages' in data and isinstance(data['languages'], list) and data['languages']:
            lang = data['languages'][0]
            if isinstance(lang, dict) and 'key' in lang:
                lang_code = lang['key'].split('/')[-1]
                metadata_tags.append(f"language:{lang_code}")
            elif isinstance(lang, str):
                metadata_tags.append(f"language:{lang}")
        # Extract subjects as freeform tags (limit to 5)
        if 'subjects' in data and isinstance(data['subjects'], list):
            for subject in data['subjects'][:5]:
                if subject and isinstance(subject, str):
                    subject_clean = str(subject).strip()
                    if subject_clean:
                        metadata_tags.append(subject_clean)
    except Exception as e:
        debug(f"⚠ Failed to fetch OpenLibrary metadata: {e}")
    return metadata_tags
 def enrich_playlist_entries(entries: list, extractor: str) -> list:
    """Enrich playlist entries with full metadata by fetching individual entry info.
@@ -3312,7 +3159,7 @@ def enrich_playlist_entries(entries: list, extractor: str) -> list:
        if entry_url and is_url_supported_by_ytdlp(entry_url):
            try:
                import yt_dlp
-                ydl_opts = {
+                ydl_opts: Any = {
                    "quiet": True,
                    "no_warnings": True,
                    "skip_download": True,
@@ -3690,294 +3537,3 @@ def extract_url_formats(formats: list) -> List[Tuple[str, str]]:
        return []
 def scrape_isbn_metadata(isbn: str) -> List[str]:
    """Scrape metadata for an ISBN using Open Library API."""
    new_tags = []
    try:
        from API.HTTP import HTTPClient
        import json as json_module
        isbn_clean = isbn.replace('-', '').strip()
        url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
        try:
            with HTTPClient() as client:
                response = client.get(url)
                response.raise_for_status()
                data = json_module.loads(response.content.decode('utf-8'))
        except Exception as e:
            log(f"Failed to fetch ISBN metadata: {e}", file=sys.stderr)
            return []
        if not data:
            log(f"No ISBN metadata found for: {isbn}")
            return []
        book_data = next(iter(data.values()), None)
        if not book_data:
            return []
        if 'title' in book_data:
            new_tags.append(f"title:{book_data['title']}")
        if 'authors' in book_data and isinstance(book_data['authors'], list):
            for author in book_data['authors'][:3]:
                if 'name' in author:
                    new_tags.append(f"author:{author['name']}")
        if 'publish_date' in book_data:
            new_tags.append(f"publish_date:{book_data['publish_date']}")
        if 'publishers' in book_data and isinstance(book_data['publishers'], list):
            for pub in book_data['publishers'][:1]:
                if 'name' in pub:
                    new_tags.append(f"publisher:{pub['name']}")
        if 'description' in book_data:
            desc = book_data['description']
            if isinstance(desc, dict) and 'value' in desc:
                desc = desc['value']
            if desc:
                desc_str = str(desc).strip()
                # Include description if available (limit to 200 chars to keep it manageable)
                if len(desc_str) > 0:
                    new_tags.append(f"description:{desc_str[:200]}")
        if 'number_of_pages' in book_data:
            page_count = book_data['number_of_pages']
            if page_count and isinstance(page_count, int) and page_count > 0:
                new_tags.append(f"pages:{page_count}")
        if 'identifiers' in book_data and isinstance(book_data['identifiers'], dict):
            identifiers = book_data['identifiers']
            if 'openlibrary' in identifiers:
                ol_ids = identifiers['openlibrary']
                if isinstance(ol_ids, list) and ol_ids:
                    new_tags.append(f"openlibrary:{ol_ids[0]}")
                elif isinstance(ol_ids, str):
                    new_tags.append(f"openlibrary:{ol_ids}")
            if 'lccn' in identifiers:
                lccn_list = identifiers['lccn']
                if isinstance(lccn_list, list) and lccn_list:
                    new_tags.append(f"lccn:{lccn_list[0]}")
                elif isinstance(lccn_list, str):
                    new_tags.append(f"lccn:{lccn_list}")
            if 'oclc' in identifiers:
                oclc_list = identifiers['oclc']
                if isinstance(oclc_list, list) and oclc_list:
                    new_tags.append(f"oclc:{oclc_list[0]}")
                elif isinstance(oclc_list, str):
                    new_tags.append(f"oclc:{oclc_list}")
            if 'goodreads' in identifiers:
                goodreads_list = identifiers['goodreads']
                if isinstance(goodreads_list, list) and goodreads_list:
                    new_tags.append(f"goodreads:{goodreads_list[0]}")
                elif isinstance(goodreads_list, str):
                    new_tags.append(f"goodreads:{goodreads_list}")
            if 'librarything' in identifiers:
                lt_list = identifiers['librarything']
                if isinstance(lt_list, list) and lt_list:
                    new_tags.append(f"librarything:{lt_list[0]}")
                elif isinstance(lt_list, str):
                    new_tags.append(f"librarything:{lt_list}")
            if 'doi' in identifiers:
                doi_list = identifiers['doi']
                if isinstance(doi_list, list) and doi_list:
                    new_tags.append(f"doi:{doi_list[0]}")
                elif isinstance(doi_list, str):
                    new_tags.append(f"doi:{doi_list}")
            if 'internet_archive' in identifiers:
                ia_list = identifiers['internet_archive']
                if isinstance(ia_list, list) and ia_list:
                    new_tags.append(f"internet_archive:{ia_list[0]}")
                elif isinstance(ia_list, str):
                    new_tags.append(f"internet_archive:{ia_list}")
        log(f"Found {len(new_tags)} tag(s) from ISBN lookup")
        return new_tags
    except Exception as e:
        log(f"ISBN scraping error: {e}", file=sys.stderr)
        return []
 def scrape_openlibrary_metadata(olid: str) -> List[str]:
    """Scrape metadata for an OpenLibrary ID using the .json API endpoint.
    Fetches from https://openlibrary.org/books/{OLID}.json and extracts:
    - Title, authors, publish date, publishers
    - Description
    - Subjects as freeform tags (without namespace prefix)
    - Identifiers (ISBN, LCCN, OCLC, etc.)
    """
    new_tags = []
    try:
        from API.HTTP import HTTPClient
        import json as json_module
        # Format: OL9674499M or just 9674499M
        olid_clean = olid.replace('OL', '').replace('M', '')
        if not olid_clean.isdigit():
            olid_clean = olid
        # Ensure we have the full OLID format for the URL
        if not olid.startswith('OL'):
            url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
        else:
            url = f"https://openlibrary.org/books/{olid}.json"
        try:
            with HTTPClient() as client:
                response = client.get(url)
                response.raise_for_status()
                data = json_module.loads(response.content.decode('utf-8'))
        except Exception as e:
            log(f"Failed to fetch OpenLibrary metadata: {e}", file=sys.stderr)
            return []
        if not data:
            log(f"No OpenLibrary metadata found for: {olid}")
            return []
        # Add title
        if 'title' in data:
            new_tags.append(f"title:{data['title']}")
        # Add authors
        if 'authors' in data and isinstance(data['authors'], list):
            for author in data['authors'][:3]:
                if isinstance(author, dict) and 'name' in author:
                    new_tags.append(f"author:{author['name']}")
                elif isinstance(author, str):
                    new_tags.append(f"author:{author}")
        # Add publish date
        if 'publish_date' in data:
            new_tags.append(f"publish_date:{data['publish_date']}")
        # Add publishers
        if 'publishers' in data and isinstance(data['publishers'], list):
            for pub in data['publishers'][:1]:
                if isinstance(pub, dict) and 'name' in pub:
                    new_tags.append(f"publisher:{pub['name']}")
                elif isinstance(pub, str):
                    new_tags.append(f"publisher:{pub}")
        # Add description
        if 'description' in data:
            desc = data['description']
            if isinstance(desc, dict) and 'value' in desc:
                desc = desc['value']
            if desc:
                desc_str = str(desc).strip()
                if len(desc_str) > 0:
                    new_tags.append(f"description:{desc_str[:200]}")
        # Add number of pages
        if 'number_of_pages' in data:
            page_count = data['number_of_pages']
            if page_count and isinstance(page_count, int) and page_count > 0:
                new_tags.append(f"pages:{page_count}")
        # Add subjects as FREEFORM tags (no namespace prefix)
        if 'subjects' in data and isinstance(data['subjects'], list):
            for subject in data['subjects'][:10]:
                if subject and isinstance(subject, str):
                    subject_clean = str(subject).strip()
                    if subject_clean and subject_clean not in new_tags:
                        new_tags.append(subject_clean)
        # Add identifiers
        if 'identifiers' in data and isinstance(data['identifiers'], dict):
            identifiers = data['identifiers']
            if 'isbn_10' in identifiers:
                isbn_10_list = identifiers['isbn_10']
                if isinstance(isbn_10_list, list) and isbn_10_list:
                    new_tags.append(f"isbn_10:{isbn_10_list[0]}")
                elif isinstance(isbn_10_list, str):
                    new_tags.append(f"isbn_10:{isbn_10_list}")
            if 'isbn_13' in identifiers:
                isbn_13_list = identifiers['isbn_13']
                if isinstance(isbn_13_list, list) and isbn_13_list:
                    new_tags.append(f"isbn_13:{isbn_13_list[0]}")
                elif isinstance(isbn_13_list, str):
                    new_tags.append(f"isbn_13:{isbn_13_list}")
            if 'lccn' in identifiers:
                lccn_list = identifiers['lccn']
                if isinstance(lccn_list, list) and lccn_list:
                    new_tags.append(f"lccn:{lccn_list[0]}")
                elif isinstance(lccn_list, str):
                    new_tags.append(f"lccn:{lccn_list}")
            if 'oclc_numbers' in identifiers:
                oclc_list = identifiers['oclc_numbers']
                if isinstance(oclc_list, list) and oclc_list:
                    new_tags.append(f"oclc:{oclc_list[0]}")
                elif isinstance(oclc_list, str):
                    new_tags.append(f"oclc:{oclc_list}")
            if 'goodreads' in identifiers:
                goodreads_list = identifiers['goodreads']
                if isinstance(goodreads_list, list) and goodreads_list:
                    new_tags.append(f"goodreads:{goodreads_list[0]}")
                elif isinstance(goodreads_list, str):
                    new_tags.append(f"goodreads:{goodreads_list}")
        log(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
        return new_tags
    except Exception as e:
        log(f"OpenLibrary scraping error: {e}", file=sys.stderr)
        return []
 def perform_metadata_scraping(tags_list: List[str]) -> List[str]:
    """Perform scraping based on identifiers in tags.
    Priority order:
    1. openlibrary: (preferred - more complete metadata)
    2. isbn_10 or isbn (fallback)
    """
    identifiers = extract_scrapable_identifiers(tags_list)
    if not identifiers:
        log("No scrapable identifiers found (openlibrary, ISBN, musicbrainz, imdb)")
        return []
    log(f"Found scrapable identifiers: {', '.join(identifiers.keys())}")
    new_tags = []
    # Prefer OpenLibrary over ISBN (more complete metadata)
    if 'openlibrary' in identifiers:
        olid = identifiers['openlibrary']
        if olid:
            log(f"Scraping OpenLibrary: {olid}")
            new_tags.extend(scrape_openlibrary_metadata(olid))
    elif 'isbn_13' in identifiers or 'isbn_10' in identifiers or 'isbn' in identifiers:
        isbn = identifiers.get('isbn_13') or identifiers.get('isbn_10') or identifiers.get('isbn')
        if isbn:
            log(f"Scraping ISBN: {isbn}")
            new_tags.extend(scrape_isbn_metadata(isbn))
    existing_tags_lower = {tag.lower() for tag in tags_list}
    scraped_unique = []
    seen = set()
    for tag in new_tags:
        tag_lower = tag.lower()
        if tag_lower not in existing_tags_lower and tag_lower not in seen:
            scraped_unique.append(tag)
            seen.add(tag_lower)
    if scraped_unique:
        log(f"Added {len(scraped_unique)} new tag(s) from scraping")
    return scraped_unique
@@ -151,6 +151,35 @@ class PipeObject:
                key_display = key if len(key) <= 15 else key[:12] + "..."
                debug(f"│   {key_display:<15}: {val_display:<42}│")
            # If we have structured provider metadata, expand it for debugging.
            full_md = self.extra.get("full_metadata")
            if isinstance(full_md, dict) and full_md:
                debug("├─────────────────────────────────────────────────────────────┤")
                debug("│ full_metadata:                                              │")
                for md_key in sorted(full_md.keys(), key=lambda x: str(x)):
                    md_val = full_md.get(md_key)
                    if isinstance(md_val, (str, int, float)) or md_val is None or isinstance(md_val, bool):
                        md_display = str(md_val)
                    elif isinstance(md_val, list):
                        if len(md_val) <= 6 and all(isinstance(x, (str, int, float, bool)) or x is None for x in md_val):
                            md_display = "[" + ", ".join(str(x) for x in md_val) + "]"
                        else:
                            md_display = f"list({len(md_val)})"
                    elif isinstance(md_val, dict):
                        # Avoid dumping huge nested dicts (like raw provider docs).
                        keys = list(md_val.keys())
                        preview = ",".join(str(k) for k in keys[:6])
                        md_display = f"dict({len(keys)})[{preview}{',...' if len(keys) > 6 else ''}]"
                    else:
                        md_str = str(md_val)
                        md_display = md_str if len(md_str) <= 40 else md_str[:37] + "..."
                    md_key_display = str(md_key)
                    md_key_display = md_key_display if len(md_key_display) <= 15 else md_key_display[:12] + "..."
                    if len(md_display) > 42:
                        md_display = md_display[:39] + "..."
                    debug(f"│   {md_key_display:<15}: {md_display:<42}│")
        if self.action:
            debug("├─────────────────────────────────────────────────────────────┤")
            action_display = self.action[:48]
@@ -575,6 +575,11 @@ def restore_previous_result_table() -> bool:
 		_DISPLAY_ITEMS = []
 		_DISPLAY_TABLE = None
 		_DISPLAY_SUBJECT = None
 		# If an underlying table exists, we're done.
 		# Otherwise, fall through to history restore so @.. actually returns to the last table.
 		if _LAST_RESULT_TABLE is not None:
 			return True
 		if not _RESULT_TABLE_HISTORY:
 			return True
 	if not _RESULT_TABLE_HISTORY:
@@ -613,6 +618,11 @@ def restore_next_result_table() -> bool:
 		_DISPLAY_ITEMS = []
 		_DISPLAY_TABLE = None
 		_DISPLAY_SUBJECT = None
 		# If an underlying table exists, we're done.
 		# Otherwise, fall through to forward restore when available.
 		if _LAST_RESULT_TABLE is not None:
 			return True
 		if not _RESULT_TABLE_FORWARD:
 			return True
 	if not _RESULT_TABLE_FORWARD:
@@ -0,0 +1,336 @@
 import requests
 import random, string
 from concurrent import futures
 from tqdm import tqdm
 import time
 from datetime import datetime
 import argparse
 import os
 import sys
 import shutil
 import json
 import re
 import base64
 import hashlib
 from Crypto.Cipher import AES
 from Crypto.Util import Counter
 def display_error(response, message):
 	print(message)
 	print(response)
 	print(response.text)
 	exit()
 def get_book_infos(session, url):
 	r = session.get(url).text
 	infos_url = "https:" + r.split('"url":"')[1].split('"')[0].replace("\\u0026", "&")
 	response = session.get(infos_url)
 	data = response.json()['data']
 	title = data['brOptions']['bookTitle'].strip().replace(" ", "_")
 	title = ''.join( c for c in title if c not in '<>:"/\\|?*' ) # Filter forbidden chars in directory names (Windows & Linux)
 	title = title[:150] # Trim the title to avoid long file names	
 	metadata = data['metadata']
 	links = []
 	for item in data['brOptions']['data']:
 		for page in item:
 			links.append(page['uri'])
 	if len(links) > 1:
 		print(f"[+] Found {len(links)} pages")
 		return title, links, metadata
 	else:
 		print(f"[-] Error while getting image links")
 		exit()
 def login(email, password):
 	session = requests.Session()
 	response = session.get("https://archive.org/services/account/login/")
 	login_data = response.json()
 	if not login_data['success']:
 		display_error(response, "[-] Error while getting login token:")
 	login_token = login_data["value"]["token"]
 	headers = {"Content-Type": "application/x-www-form-urlencoded"}
 	data = {"username":email, "password":password, "t": login_token}
 	response = session.post("https://archive.org/services/account/login/", headers=headers, data=json.dumps(data))
 	try:
 		response_json = response.json()
 	except:
 		display_error(response, "[-] Error while login:")
 	if response_json["success"] == False:
 		if response_json["value"] == "bad_login":
 			print("[-] Invalid credentials!")
 			exit()
 		display_error(response, "[-] Error while login:")
 	else:
 		print("[+] Successful login")
 		return session
 def loan(session, book_id, verbose=True):
 	data = {
 		"action": "grant_access",
 		"identifier": book_id
 	}
 	response = session.post("https://archive.org/services/loans/loan/searchInside.php", data=data)
 	data['action'] = "browse_book"
 	response = session.post("https://archive.org/services/loans/loan/", data=data)
 	if response.status_code == 400 :
 		try:
 			if response.json()["error"] == "This book is not available to borrow at this time. Please try again later.":
 				print("This book doesn't need to be borrowed")
 				return session
 			else :
 				display_error(response, "Something went wrong when trying to borrow the book.")
 		except: # The response is not in JSON format
 			display_error(response, "The book cannot be borrowed")
 	data['action'] = "create_token"
 	response = session.post("https://archive.org/services/loans/loan/", data=data)
 	if "token" in response.text:
 		if verbose:
 			print("[+] Successful loan")
 		return session
 	else:
 		display_error(response, "Something went wrong when trying to borrow the book, maybe you can't borrow this book.")
 def return_loan(session, book_id):
 	data = {
 		"action": "return_loan",
 		"identifier": book_id
 	}
 	response = session.post("https://archive.org/services/loans/loan/", data=data)
 	if response.status_code == 200 and response.json()["success"]:
 		print("[+] Book returned")
 	else:
 		display_error(response, "Something went wrong when trying to return the book")
 def image_name(pages, page, directory):
 	return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
 def deobfuscate_image(image_data, link, obf_header):
 	"""
 	@Author: https://github.com/justimm
 	Decrypts the first 1024 bytes of image_data using AES-CTR.
 	The obfuscation_header is expected in the form "1|<base64encoded_counter>"
 	where the base64-decoded counter is 16 bytes.
 	We derive the AES key by taking the SHA-1 digest of the image URL (with protocol/host removed)
 	and using the first 16 bytes.
 	For AES-CTR, we use a 16-byte counter block. The first 8 bytes are used as a fixed prefix,
 	and the remaining 8 bytes (interpreted as a big-endian integer) are used as the initial counter value.
 	"""
 	try:
 		version, counter_b64 = obf_header.split('|')
 	except Exception as e:
 		raise ValueError("Invalid X-Obfuscate header format") from e
 	if version != '1':
 		raise ValueError("Unsupported obfuscation version: " + version)
 	# Derive AES key: replace protocol/host in link with '/'
 	aesKey = re.sub(r"^https?:\/\/.*?\/", "/", link)
 	sha1_digest = hashlib.sha1(aesKey.encode('utf-8')).digest()
 	key = sha1_digest[:16]
 	# Decode the counter (should be 16 bytes)
 	counter_bytes = base64.b64decode(counter_b64)
 	if len(counter_bytes) != 16:
 		raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
 	prefix = counter_bytes[:8]
 	initial_value = int.from_bytes(counter_bytes[8:], byteorder='big')
 	# Create AES-CTR cipher with a 64-bit counter length.
 	ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False)
 	cipher = AES.new(key, AES.MODE_CTR, counter=ctr)
 	decrypted_part = cipher.decrypt(image_data[:1024])
 	new_data = decrypted_part + image_data[1024:]
 	return new_data	
 def download_one_image(session, link, i, directory, book_id, pages):
 	headers = {
 		"Referer": "https://archive.org/",
 		"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
 		"Sec-Fetch-Site": "same-site",
 		"Sec-Fetch-Mode": "no-cors",
 		"Sec-Fetch-Dest": "image",
 	}
 	retry = True
 	response = None
 	while retry:
 		try:
 			response = session.get(link, headers=headers)
 			if response.status_code == 403:
 				session = loan(session, book_id, verbose=False)
 				raise Exception("Borrow again")
 			elif response.status_code == 200:
 				retry = False
 		except:
 			time.sleep(1)	# Wait 1 second before retrying
 	image = image_name(pages, i, directory)
 	obf_header = response.headers.get("X-Obfuscate")
 	image_content = None
 	if obf_header:
 		try:
 			image_content = deobfuscate_image(response.content, link, obf_header)
 		except Exception as e:
 			print(f"[ERROR] Deobfuscation failed: {e}")
 			return
 	else:
 		image_content = response.content
 	with open(image, "wb") as f:
 		f.write(image_content)
 def download(session, n_threads, directory, links, scale, book_id):
 	print("Downloading pages...")
 	links = [f"{link}&rotate=0&scale={scale}" for link in links]
 	pages = len(links)
 	tasks = []
 	with futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
 		for link in links:
 			i = links.index(link)
 			tasks.append(executor.submit(download_one_image, session=session, link=link, i=i, directory=directory, book_id=book_id, pages=pages))
 		for task in tqdm(futures.as_completed(tasks), total=len(tasks)):
 			pass
 	images = [image_name(pages, i, directory) for i in range(len(links))]
 	return images
 def make_pdf(pdf, title, directory):
 	file = title+".pdf"
 	# Handle the case where multiple books with the same name are downloaded
 	i = 1
 	while os.path.isfile(os.path.join(directory, file)):
 		file = f"{title}({i}).pdf"
 		i += 1
 	with open(os.path.join(directory, file),"wb") as f:
 		f.write(pdf)
 	print(f"[+] PDF saved as \"{file}\"")
 if __name__ == "__main__":
 	my_parser = argparse.ArgumentParser()
 	my_parser.add_argument('-e', '--email', help='Your archive.org email', type=str, required=True)
 	my_parser.add_argument('-p', '--password', help='Your archive.org password', type=str, required=True)
 	my_parser.add_argument('-u', '--url', help='Link to the book (https://archive.org/details/XXXX). You can use this argument several times to download multiple books', action='append', type=str)
 	my_parser.add_argument('-d', '--dir', help='Output directory', type=str)
 	my_parser.add_argument('-f', '--file', help='File where are stored the URLs of the books to download', type=str)
 	my_parser.add_argument('-r', '--resolution', help='Image resolution (10 to 0, 0 is the highest), [default 3]', type=int, default=3)
 	my_parser.add_argument('-t', '--threads', help="Maximum number of threads, [default 50]", type=int, default=50)
 	my_parser.add_argument('-j', '--jpg', help="Output to individual JPG's rather than a PDF", action='store_true')
 	my_parser.add_argument('-m', '--meta', help="Output the metadata of the book to a json file (-j option required)", action='store_true')
 	if len(sys.argv) == 1:
 		my_parser.print_help(sys.stderr)
 		sys.exit(1)
 	args = my_parser.parse_args()
 	if args.url is None and args.file is None:
 		my_parser.error("At least one of --url and --file required")
 	email = args.email
 	password = args.password
 	scale = args.resolution
 	n_threads = args.threads
 	d = args.dir
 	if d == None:
 		d = os.getcwd()
 	elif not os.path.isdir(d):
 		print(f"Output directory does not exist!")
 		exit()
 	if args.url is not None:
 		urls = args.url
 	else:
 		if os.path.exists(args.file):
 			with open(args.file) as f:
 				urls = f.read().strip().split("\n")
 		else:
 			print(f"{args.file} does not exist!")
 			exit()
 	# Check the urls format
 	for url in urls:
 		if not url.startswith("https://archive.org/details/"):
 			print(f"{url} --> Invalid url. URL must starts with \"https://archive.org/details/\"")
 			exit()
 	print(f"{len(urls)} Book(s) to download")
 	session = login(email, password)
 	for url in urls:
 		book_id = list(filter(None, url.split("/")))[3]
 		print("="*40)
 		print(f"Current book: https://archive.org/details/{book_id}")
 		session = loan(session, book_id)
 		title, links, metadata = get_book_infos(session, url)
 		directory = os.path.join(d, title)
 		# Handle the case where multiple books with the same name are downloaded
 		i = 1
 		_directory = directory
 		while os.path.isdir(directory):
 			directory = f"{_directory}({i})"
 			i += 1
 		os.makedirs(directory)
 		if args.meta:
 			print("Writing metadata.json...")
 			with open(f"{directory}/metadata.json",'w') as f:
 				json.dump(metadata,f)
 		images = download(session, n_threads, directory, links, scale, book_id)
 		if not args.jpg: # Create pdf with images and remove the images folder
 			import img2pdf
 			# prepare PDF metadata
 			# sometimes archive metadata is missing
 			pdfmeta = { }
 			# ensure metadata are str
 			for key in ["title", "creator", "associated-names"]:
 				if key in metadata:
 					if isinstance(metadata[key], str):
 						pass
 					elif isinstance(metadata[key], list):
 						metadata[key] = "; ".join(metadata[key])
 					else:
 						raise Exception("unsupported metadata type")
 			# title
 			if 'title' in metadata:
 				pdfmeta['title'] = metadata['title']
 			# author
 			if 'creator' in metadata and 'associated-names' in metadata:
 				pdfmeta['author'] = metadata['creator'] + "; " + metadata['associated-names']
 			elif 'creator' in metadata:
 				pdfmeta['author'] = metadata['creator']
 			elif 'associated-names' in metadata:
 				pdfmeta['author'] = metadata['associated-names']
 			# date
 			if 'date' in metadata:
 				try:
 					pdfmeta['creationdate'] = datetime.strptime(metadata['date'][0:4], '%Y')
 				except:
 					pass
 			# keywords
 			pdfmeta['keywords'] = [f"https://archive.org/details/{book_id}"]
 			pdf = img2pdf.convert(images, **pdfmeta)
 			make_pdf(pdf, title, args.dir if args.dir != None else "")
 			try:
 				shutil.rmtree(directory)
 			except OSError as e:
 				print ("Error: %s - %s." % (e.filename, e.strerror))
 		return_loan(session, book_id)