AST

2025-11-25 20:09:33 -08:00
parent d75c644a82
commit bd69119996
80 changed files with 39615 additions and 0 deletions
--- a/helper/init.py
+++ b/helper/init.py
@@ -0,0 +1,92 @@
+"""Helper modules for the downlow mpv integration."""
+from . import hydrus as _hydrus
+from . import download as _download
+from . import tasks as _tasks
+from . import utils as _utils
+
+try:  # Optional dependency on Playwright
+    from . import webshot as _webshot
+except Exception as exc:  # pragma: no cover - surfaced when Playwright is missing
+    _webshot = None  # type: ignore
+    ScreenshotError = None  # type: ignore[assignment]
+    ScreenshotOptions = None  # type: ignore[assignment]
+    ScreenshotResult = None  # type: ignore[assignment]
+    capture_screenshot = None  # type: ignore[assignment]
+    ScreenshotImportError = exc  # type: ignore[assignment]
+else:
+    ScreenshotError = _webshot.ScreenshotError
+    ScreenshotOptions = _webshot.ScreenshotOptions
+    ScreenshotResult = _webshot.ScreenshotResult
+    capture_screenshot = _webshot.capture_screenshot
+    ScreenshotImportError = None
+# CBOR utilities
+decode_cbor = _utils.decode_cbor
+jsonify = _utils.jsonify
+# General utilities
+CHUNK_SIZE = _utils.CHUNK_SIZE
+ensure_directory = _utils.ensure_directory
+unique_path = _utils.unique_path
+download_hydrus_file = _hydrus.download_hydrus_file
+sanitize_metadata_value = _utils.sanitize_metadata_value
+unique_preserve_order = _utils.unique_preserve_order
+sha256_file = _utils.sha256_file
+create_metadata_sidecar = _utils.create_metadata_sidecar
+create_tags_sidecar = _utils.create_tags_sidecar
+# Format utilities
+format_bytes = _utils.format_bytes
+format_duration = _utils.format_duration
+format_timestamp = _utils.format_timestamp
+format_metadata_value = _utils.format_metadata_value
+# Link utilities
+extract_link = _utils.extract_link
+extract_link_from_args = _utils.extract_link_from_args
+extract_link_from_result = _utils.extract_link_from_result
+get_api_key = _utils.get_api_key
+add_direct_link_to_result = _utils.add_direct_link_to_result
+# URL policy utilities
+resolve_url_policy = _utils.resolve_url_policy
+UrlPolicy = _utils.UrlPolicy
+# Download utilities
+DownloadOptions = _download.DownloadOptions
+DownloadError = _download.DownloadError
+DownloadMediaResult = _download.DownloadMediaResult
+download_media = _download.download_media
+is_url_supported_by_ytdlp = _download.is_url_supported_by_ytdlp
+probe_url = _download.probe_url
+# Hydrus utilities
+hydrus_request = _hydrus.hydrus_request
+hydrus_export = _hydrus.hydrus_export
+HydrusClient = _hydrus.HydrusClient
+HydrusRequestError = _hydrus.HydrusRequestError
+connect_ipc = _tasks.connect_ipc
+ipc_sender = _tasks.ipc_sender
+__all__ = [
+    'decode_cbor',
+    'jsonify',
+    'CHUNK_SIZE',
+    'ensure_directory',
+    'unique_path',
+    'download_hydrus_file',
+    'sanitize_metadata_value',
+    'unique_preserve_order',
+    'sha256_file',
+    'resolve_url_policy',
+    'UrlPolicy',
+    'ScreenshotError',
+    'ScreenshotOptions',
+    'ScreenshotResult',
+    'capture_screenshot',
+    'ScreenshotImportError',
+    'DownloadOptions',
+    'DownloadError',
+    'DownloadMediaResult',
+    'download_media',
+    'is_url_supported_by_ytdlp',
+    'probe_url',
+    'HydrusClient',
+    'HydrusRequestError',
+    'hydrus_request',
+    'hydrus_export',
+    'connect_ipc',
+    'ipc_sender',
+]
--- a/helper/adjective.json
+++ b/helper/adjective.json
@@ -0,0 +1,130 @@
+{
+  "Occult": [
+    "esoterica",
+    "ritual",
+    "alchemy",
+    "magic",
+    "hermetic",
+    "divination",
+    "grimoires",
+    "symbolism",
+    "ceremony"
+  ],
+  "Philosophy": [
+    "ethics",
+    "metaphysics",
+    "epistemology",
+    "logic",
+    "existentialism",
+    "stoicism",
+    "phenomenology",
+    "dialectic",
+    "aesthetics"
+  ],
+  "Mystery": [
+    "investigation",
+    "crime",
+    "detective",
+    "noir",
+    "thriller",
+    "suspense",
+    "conspiracy",
+    "whodunit",
+    "clues"
+  ],
+  "Religion": [
+    "scripture",
+    "theology",
+    "worship",
+    "ritual",
+    "doctrine",
+    "faith",
+    "tradition",
+    "liturgy",
+    "sacred"
+  ],
+  "Mythology": [
+    "gods",
+    "creation",
+    "heroes",
+    "legends",
+    "folklore",
+    "pantheon",
+    "epic",
+    "mythic",
+    "archetype"
+  ],
+  "Science": [
+    "research",
+    "experiment",
+    "theory",
+    "biology",
+    "physics",
+    "chemistry",
+    "data",
+    "method",
+    "innovation"
+  ],
+  "Art": [
+    "visual",
+    "painting",
+    "sculpture",
+    "modernism",
+    "technique",
+    "studio",
+    "curation",
+    "expression",
+    "composition"
+  ],
+  "Literature": [
+    "fiction",
+    "poetry",
+    "novel",
+    "criticism",
+    "narrative",
+    "prose",
+    "drama",
+    "canonical",
+    "translation"
+  ],
+  "History": [
+    "archaeology",
+    "chronicle",
+    "period",
+    "empire",
+    "revolution",
+    "archive",
+    "heritage",
+    "historiography",
+    "timeline"
+  ],
+  "Psychology": [
+    "cognition",
+    "behavior",
+    "therapy",
+    "development",
+    "neuroscience",
+    "personality",
+    "perception",
+    "emotion",
+    "motivation"
+  ],
+  "gnostic": [
+    "religion",
+    "scripture",
+    "gnostic",
+    "gospel",
+    "wisdom",
+    "spirituality",
+    "ancient",
+    "philosophy",
+    "esoteric",
+    "mysticism",
+    "mythology",
+    "theology",
+    "sacred",
+    "divine",
+    "apocrapha",
+    "gnosticism"
+  ]
+}
--- a/helper/alldebrid.py
+++ b/helper/alldebrid.py
@@ -0,0 +1,829 @@
+"""AllDebrid API integration for converting free links to direct downloads.
+
+AllDebrid is a debrid service that unlocks free file hosters and provides direct download links.
+API docs: https://docs.alldebrid.com/#general-informations
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+
+from helper.logger import log, debug
+import time
+import logging
+from pathlib import Path
+from typing import Any, Dict, Optional, Set, List, Sequence
+from urllib.parse import urlencode, urlparse
+from .http_client import HTTPClient
+
+logger = logging.getLogger(__name__)
+
+
+class AllDebridError(Exception):
+    """Raised when AllDebrid API request fails."""
+    pass
+
+
+# Cache for supported hosters (domain -> host info)
+_SUPPORTED_HOSTERS_CACHE: Optional[Dict[str, Dict[str, Any]]] = None
+_CACHE_TIMESTAMP: float = 0
+_CACHE_DURATION: float = 3600  # 1 hour
+
+
+class AllDebridClient:
+    """Client for AllDebrid API."""
+    
+    # Try both v4 and v3 APIs
+    BASE_URLS = [
+        "https://api.alldebrid.com/v4",
+        "https://api.alldebrid.com/v3",
+    ]
+    
+    def __init__(self, api_key: str):
+        """Initialize AllDebrid client with API key.
+        
+        Args:
+            api_key: AllDebrid API key from config
+        """
+        self.api_key = api_key.strip()
+        if not self.api_key:
+            raise AllDebridError("AllDebrid API key is empty")
+        self.base_url = self.BASE_URLS[0]  # Start with v4
+    
+    def _request(self, endpoint: str, params: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        """Make a request to AllDebrid API.
+        
+        Args:
+            endpoint: API endpoint (e.g., "user/profile", "link/unlock")
+            params: Query parameters
+            
+        Returns:
+            Parsed JSON response
+            
+        Raises:
+            AllDebridError: If request fails or API returns error
+        """
+        if params is None:
+            params = {}
+        
+        # Add API key to params
+        params['apikey'] = self.api_key
+        
+        url = f"{self.base_url}/{endpoint}"
+        query_string = urlencode(params)
+        full_url = f"{url}?{query_string}"
+        
+        logger.debug(f"[AllDebrid] {endpoint} request to {full_url[:80]}...")
+        
+        try:
+            # Pass timeout to HTTPClient init, not to get()
+            with HTTPClient(timeout=30.0, headers={'User-Agent': 'downlow/1.0'}) as client:
+                try:
+                    response = client.get(full_url)
+                    response.raise_for_status()
+                except Exception as req_err:
+                    # Log detailed error info
+                    logger.error(f"[AllDebrid] Request error to {full_url[:80]}: {req_err}", exc_info=True)
+                    if hasattr(req_err, 'response') and req_err.response is not None:  # type: ignore
+                        try:
+                            error_body = req_err.response.content.decode('utf-8')  # type: ignore
+                            logger.error(f"[AllDebrid] Response body: {error_body[:200]}")
+                        except:
+                            pass
+                    raise
+                
+                data = json.loads(response.content.decode('utf-8'))
+                logger.debug(f"[AllDebrid] Response status: {response.status_code}")
+                
+                # Check for API errors
+                if data.get('status') == 'error':
+                    error_msg = data.get('error', {}).get('message', 'Unknown error')
+                    logger.error(f"[AllDebrid] API error: {error_msg}")
+                    raise AllDebridError(f"AllDebrid API error: {error_msg}")
+                
+                return data
+        except AllDebridError:
+            raise
+        except Exception as exc:
+            error_msg = f"AllDebrid request failed: {exc}"
+            logger.error(f"[AllDebrid] {error_msg}", exc_info=True)
+            raise AllDebridError(error_msg)
+    
+    def unlock_link(self, link: str) -> Optional[str]:
+        """Unlock a restricted link and get direct download URL.
+        
+        Args:
+            link: Restricted link to unlock
+            
+        Returns:
+            Direct download URL, or None if already unrestricted
+            
+        Raises:
+            AllDebridError: If unlock fails
+        """
+        if not link.startswith(('http://', 'https://')):
+            raise AllDebridError(f"Invalid URL: {link}")
+        
+        try:
+            response = self._request('link/unlock', {'link': link})
+            
+            # Check if unlock was successful
+            if response.get('status') == 'success':
+                data = response.get('data', {})
+                
+                # AllDebrid returns the download info in 'link' field
+                if 'link' in data:
+                    return data['link']
+                
+                # Alternative: check for 'file' field
+                if 'file' in data:
+                    return data['file']
+                
+                # If no direct link, return the input link
+                return link
+            
+            return None
+        except AllDebridError:
+            raise
+        except Exception as exc:
+            raise AllDebridError(f"Failed to unlock link: {exc}")
+    
+    def check_host(self, hostname: str) -> Dict[str, Any]:
+        """Check if a host is supported by AllDebrid.
+        
+        Args:
+            hostname: Hostname to check (e.g., "uploadhaven.com")
+            
+        Returns:
+            Host information dict with support status
+            
+        Raises:
+            AllDebridError: If request fails
+        """
+        try:
+            response = self._request('host', {'name': hostname})
+            
+            if response.get('status') == 'success':
+                return response.get('data', {})
+            
+            return {}
+        except AllDebridError:
+            raise
+        except Exception as exc:
+            raise AllDebridError(f"Failed to check host: {exc}")
+    
+    def get_user_info(self) -> Dict[str, Any]:
+        """Get current user account information.
+        
+        Returns:
+            User information dict
+            
+        Raises:
+            AllDebridError: If request fails
+        """
+        try:
+            response = self._request('user/profile')
+            
+            if response.get('status') == 'success':
+                return response.get('data', {})
+            
+            return {}
+        except AllDebridError:
+            raise
+        except Exception as exc:
+            raise AllDebridError(f"Failed to get user info: {exc}")
+    
+    def get_supported_hosters(self) -> Dict[str, Dict[str, Any]]:
+        """Get list of all supported hosters from AllDebrid API.
+        
+        Returns:
+            Dict mapping domain to host info (status, name, etc)
+            
+        Raises:
+            AllDebridError: If request fails
+        """
+        try:
+            response = self._request('hosts/domains')
+            
+            if response.get('status') == 'success':
+                data = response.get('data', {})
+                # The API returns hosts keyed by domain
+                return data if isinstance(data, dict) else {}
+            
+            return {}
+        except AllDebridError:
+            raise
+        except Exception as exc:
+            raise AllDebridError(f"Failed to get supported hosters: {exc}")
+    
+    def magnet_add(self, magnet_uri: str) -> Dict[str, Any]:
+        """Submit a magnet link or torrent hash to AllDebrid for processing.
+        
+        AllDebrid will download the torrent content and store it in the account.
+        Processing time varies based on torrent size and availability.
+        
+        Args:
+            magnet_uri: Magnet URI (magnet:?xt=urn:btih:...) or torrent hash
+            
+        Returns:
+            Dict with magnet info:
+                - id: Magnet ID (int) - needed for status checks
+                - name: Torrent name
+                - hash: Torrent hash
+                - size: Total file size (bytes)
+                - ready: Boolean - True if already available
+                
+        Raises:
+            AllDebridError: If submit fails (requires premium, invalid magnet, etc)
+        """
+        if not magnet_uri:
+            raise AllDebridError("Magnet URI is empty")
+        
+        try:
+            # API endpoint: POST /v4/magnet/upload
+            # Format: /magnet/upload?apikey=key&magnets[]=magnet:?xt=...
+            response = self._request('magnet/upload', {'magnets[]': magnet_uri})
+            
+            if response.get('status') == 'success':
+                data = response.get('data', {})
+                magnets = data.get('magnets', [])
+                
+                if magnets and len(magnets) > 0:
+                    magnet_info = magnets[0]
+                    
+                    # Check for errors in the magnet response
+                    if 'error' in magnet_info:
+                        error = magnet_info['error']
+                        error_msg = error.get('message', 'Unknown error')
+                        raise AllDebridError(f"Magnet error: {error_msg}")
+                    
+                    return magnet_info
+                
+                raise AllDebridError("No magnet data in response")
+            
+            raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
+        except AllDebridError:
+            raise
+        except Exception as exc:
+            raise AllDebridError(f"Failed to submit magnet: {exc}")
+    
+    def magnet_status(self, magnet_id: int, include_files: bool = False) -> Dict[str, Any]:
+        """Get status of a magnet currently being processed or stored.
+        
+        Status codes:
+            0-3: Processing (in queue, downloading, compressing, uploading)
+            4: Ready (files available for download)
+            5-15: Error (upload failed, not downloaded in 20min, too big, etc)
+            
+        Args:
+            magnet_id: Magnet ID from magnet_add()
+            include_files: If True, includes file list in response
+            
+        Returns:
+            Dict with status info:
+                - id: Magnet ID
+                - filename: Torrent name
+                - size: Total size (bytes)
+                - status: Human-readable status
+                - statusCode: Numeric code (0-15)
+                - downloaded: Bytes downloaded so far
+                - uploaded: Bytes uploaded so far
+                - seeders: Number of seeders
+                - downloadSpeed: Current speed (bytes/sec)
+                - uploadSpeed: Current speed (bytes/sec)
+                - files: (optional) Array of file objects when include_files=True
+                    Each file: {n: name, s: size, l: download_link}
+                
+        Raises:
+            AllDebridError: If status check fails
+        """
+        if not isinstance(magnet_id, int) or magnet_id <= 0:
+            raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
+        
+        try:
+            # Use v4.1 endpoint for better response format
+            # Temporarily override base_url for this request
+            old_base = self.base_url
+            self.base_url = "https://api.alldebrid.com/v4.1"
+            
+            try:
+                response = self._request('magnet/status', {'id': str(magnet_id)})
+            finally:
+                self.base_url = old_base
+            
+            if response.get('status') == 'success':
+                data = response.get('data', {})
+                magnets = data.get('magnets', {})
+                
+                # Handle both list and dict responses
+                if isinstance(magnets, list) and len(magnets) > 0:
+                    return magnets[0]
+                elif isinstance(magnets, dict) and magnets:
+                    return magnets
+                
+                raise AllDebridError(f"No magnet found with ID {magnet_id}")
+            
+            raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
+        except AllDebridError:
+            raise
+        except Exception as exc:
+            raise AllDebridError(f"Failed to get magnet status: {exc}")
+    
+    def magnet_status_live(self, magnet_id: int, session: int = None, counter: int = 0) -> Dict[str, Any]:
+        """Get live status of a magnet using delta sync mode.
+        
+        The live mode endpoint provides real-time progress by only sending
+        deltas (changed fields) instead of full status on each call. This
+        reduces bandwidth and server load compared to regular polling.
+        
+        Note: The "live" designation refers to the delta-sync mode where you
+        maintain state locally and apply diffs from the API, not a streaming
+        endpoint. Regular magnet_status() polling is simpler for single magnets.
+        
+        Docs: https://docs.alldebrid.com/#get-status-live-mode
+        
+        Args:
+            magnet_id: Magnet ID from magnet_add()
+            session: Session ID (use same ID across multiple calls). If None, will query current status
+            counter: Counter value from previous response (starts at 0)
+            
+        Returns:
+            Dict with magnet status. May contain only changed fields if counter > 0.
+            For single-magnet tracking, use magnet_status() instead.
+            
+        Raises:
+            AllDebridError: If request fails
+        """
+        if not isinstance(magnet_id, int) or magnet_id <= 0:
+            raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
+        
+        try:
+            # For single magnet queries, just use regular endpoint with ID
+            # The "live mode" with session/counter is for multi-magnet dashboards
+            # where bandwidth savings from diffs matter
+            response = self._request('magnet/status', {'id': magnet_id})
+            
+            if response.get('status') == 'success':
+                data = response.get('data', {})
+                magnets = data.get('magnets', [])
+                
+                # Handle list response
+                if isinstance(magnets, list) and len(magnets) > 0:
+                    return magnets[0]
+                
+                raise AllDebridError(f"No magnet found with ID {magnet_id}")
+            
+            raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
+        except AllDebridError:
+            raise
+        except Exception as exc:
+            raise AllDebridError(f"Failed to get magnet live status: {exc}")
+    
+    def magnet_links(self, magnet_ids: list) -> Dict[str, Any]:
+        """Get files and download links for one or more magnets.
+        
+        Use this after magnet_status shows statusCode == 4 (Ready).
+        Returns the file tree structure with direct download links.
+        
+        Args:
+            magnet_ids: List of magnet IDs to get files for
+            
+        Returns:
+            Dict mapping magnet_id (as string) -> magnet_info:
+                - id: Magnet ID
+                - files: Array of file/folder objects
+                    File: {n: name, s: size, l: direct_download_link}
+                    Folder: {n: name, e: [sub_items]}
+                
+        Raises:
+            AllDebridError: If request fails
+        """
+        if not magnet_ids:
+            raise AllDebridError("No magnet IDs provided")
+        
+        try:
+            # Build parameter: id[]=123&id[]=456 style
+            params = {}
+            for i, magnet_id in enumerate(magnet_ids):
+                params[f'id[{i}]'] = str(magnet_id)
+            
+            response = self._request('magnet/files', params)
+            
+            if response.get('status') == 'success':
+                data = response.get('data', {})
+                magnets = data.get('magnets', [])
+                
+                # Convert list to dict keyed by ID (as string) for easier access
+                result = {}
+                for magnet_info in magnets:
+                    magnet_id = magnet_info.get('id')
+                    if magnet_id:
+                        result[str(magnet_id)] = magnet_info
+                
+                return result
+            
+            raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
+        except AllDebridError:
+            raise
+        except Exception as exc:
+            raise AllDebridError(f"Failed to get magnet files: {exc}")
+    
+    def instant_available(self, magnet_hash: str) -> Optional[List[Dict[str, Any]]]:
+        """Check if magnet is available for instant streaming without downloading.
+        
+        AllDebrid's "instant" feature checks if a magnet can be streamed directly
+        without downloading all the data. Returns available video/audio files.
+        
+        Args:
+            magnet_hash: Torrent hash (with or without magnet: prefix)
+            
+        Returns:
+            List of available files for streaming, or None if not available
+            Each file: {n: name, s: size, e: extension, t: type}
+            Returns empty list if torrent not found or not available
+            
+        Raises:
+            AllDebridError: If API request fails
+        """
+        try:
+            # Parse magnet hash if needed
+            if magnet_hash.startswith('magnet:'):
+                # Extract hash from magnet URI
+                import re
+                match = re.search(r'xt=urn:btih:([a-fA-F0-9]+)', magnet_hash)
+                if not match:
+                    return None
+                hash_value = match.group(1)
+            else:
+                hash_value = magnet_hash.strip()
+            
+            if not hash_value or len(hash_value) < 32:
+                return None
+            
+            response = self._request('magnet/instant', {'magnet': hash_value})
+            
+            if response.get('status') == 'success':
+                data = response.get('data', {})
+                # Returns 'files' array if available, or empty
+                return data.get('files', [])
+            
+            # Not available is not an error, just return empty list
+            return []
+            
+        except AllDebridError:
+            raise
+        except Exception as exc:
+            logger.debug(f"[AllDebrid] instant_available check failed: {exc}")
+            return None
+    
+    def magnet_delete(self, magnet_id: int) -> bool:
+        """Delete a magnet from the AllDebrid account.
+        
+        Args:
+            magnet_id: Magnet ID to delete
+            
+        Returns:
+            True if deletion was successful
+            
+        Raises:
+            AllDebridError: If deletion fails
+        """
+        if not isinstance(magnet_id, int) or magnet_id <= 0:
+            raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
+        
+        try:
+            response = self._request('magnet/delete', {'id': str(magnet_id)})
+            
+            if response.get('status') == 'success':
+                return True
+            
+            raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
+        except AllDebridError:
+            raise
+        except Exception as exc:
+            raise AllDebridError(f"Failed to delete magnet: {exc}")
+
+
+def _get_cached_supported_hosters(api_key: str) -> Set[str]:
+    """Get cached list of supported hoster domains.
+    
+    Uses AllDebrid API to fetch the list once per hour,
+    caching the result to avoid repeated API calls.
+    
+    Args:
+        api_key: AllDebrid API key
+        
+    Returns:
+        Set of supported domain names (lowercased)
+    """
+    global _SUPPORTED_HOSTERS_CACHE, _CACHE_TIMESTAMP
+    
+    now = time.time()
+    
+    # Return cached result if still valid
+    if _SUPPORTED_HOSTERS_CACHE is not None and (now - _CACHE_TIMESTAMP) < _CACHE_DURATION:
+        return set(_SUPPORTED_HOSTERS_CACHE.keys())
+    
+    # Fetch fresh list from API
+    try:
+        client = AllDebridClient(api_key)
+        hosters_dict = client.get_supported_hosters()
+        
+        if hosters_dict:
+            # API returns: hosts (list), streams (list), redirectors (list)
+            # Combine all into a single set
+            all_domains: Set[str] = set()
+            
+            # Add hosts
+            if 'hosts' in hosters_dict and isinstance(hosters_dict['hosts'], list):
+                all_domains.update(hosters_dict['hosts'])
+            
+            # Add streams  
+            if 'streams' in hosters_dict and isinstance(hosters_dict['streams'], list):
+                all_domains.update(hosters_dict['streams'])
+            
+            # Add redirectors
+            if 'redirectors' in hosters_dict and isinstance(hosters_dict['redirectors'], list):
+                all_domains.update(hosters_dict['redirectors'])
+            
+            # Cache as dict for consistency
+            _SUPPORTED_HOSTERS_CACHE = {domain: {} for domain in all_domains}
+            _CACHE_TIMESTAMP = now
+            
+            if all_domains:
+                debug(f"✓ Cached {len(all_domains)} supported hosters")
+            
+            return all_domains
+    except Exception as exc:
+        log(f"⚠ Failed to fetch supported hosters: {exc}", file=sys.stderr)
+        # Return any cached hosters even if expired
+        if _SUPPORTED_HOSTERS_CACHE:
+            return set(_SUPPORTED_HOSTERS_CACHE.keys())
+    
+    # Fallback: empty set if no cache available
+    return set()
+
+
+def is_link_restrictable_hoster(url: str, api_key: str) -> bool:
+    """Check if a URL is from a hoster that AllDebrid can unlock.
+    
+    Intelligently queries the AllDebrid API to detect if the URL is
+    from a supported restricted hoster.
+    
+    Args:
+        url: URL to check
+        api_key: AllDebrid API key
+        
+    Returns:
+        True if URL is from a supported restrictable hoster
+    """
+    if not url or not api_key:
+        return False
+    
+    try:
+        # Extract domain from URL
+        parsed = urlparse(url)
+        domain = parsed.netloc.lower()
+        
+        # Remove www. prefix for comparison
+        if domain.startswith('www.'):
+            domain = domain[4:]
+        
+        # Get supported hosters (cached)
+        supported = _get_cached_supported_hosters(api_key)
+        
+        if not supported:
+            # API check failed, fall back to manual detection
+            # Check for common restricted hosters
+            common_hosters = {
+                'uploadhaven.com', 'uploaded.to', 'uploaded.net',
+                'datafile.com', 'rapidfile.io', 'nitroflare.com',
+                '1fichier.com', 'mega.nz', 'mediafire.com'
+            }
+            return any(host in url.lower() for host in common_hosters)
+        
+        # Check if domain is in supported list
+        # Need to check exact match and with/without www
+        return domain in supported or f"www.{domain}" in supported
+    except Exception as exc:
+        log(f"⚠ Hoster detection failed: {exc}", file=sys.stderr)
+        return False
+
+
+def convert_link_with_debrid(link: str, api_key: str) -> Optional[str]:
+    """Convert a restricted link to a direct download URL using AllDebrid.
+    
+    Args:
+        link: Restricted link
+        api_key: AllDebrid API key
+        
+    Returns:
+        Direct download URL, or original link if already unrestricted
+    """
+    if not api_key:
+        return None
+    
+    try:
+        client = AllDebridClient(api_key)
+        direct_link = client.unlock_link(link)
+        
+        if direct_link and direct_link != link:
+            debug(f"✓ Converted link: {link[:60]}... → {direct_link[:60]}...")
+            return direct_link
+        
+        return None
+    except AllDebridError as exc:
+        log(f"⚠ Failed to convert link: {exc}", file=sys.stderr)
+        return None
+    except Exception as exc:
+        log(f"⚠ Unexpected error: {exc}", file=sys.stderr)
+        return None
+
+
+def is_magnet_link(uri: str) -> bool:
+    """Check if a URI is a magnet link.
+    
+    Magnet links start with 'magnet:?xt=urn:btih:' or just 'magnet:'
+    
+    Args:
+        uri: URI to check
+        
+    Returns:
+        True if URI is a magnet link
+    """
+    if not uri:
+        return False
+    return uri.lower().startswith('magnet:')
+
+
+def is_torrent_hash(text: str) -> bool:
+    """Check if text looks like a torrent hash (40 or 64 hex characters).
+    
+    Common formats:
+        - Info hash v1: 40 hex chars (SHA-1)
+        - Info hash v2: 64 hex chars (SHA-256)
+        
+    Args:
+        text: Text to check
+        
+    Returns:
+        True if text matches torrent hash format
+    """
+    if not text or not isinstance(text, str):
+        return False
+    
+    text = text.strip()
+    
+    # Check if it's 40 hex chars (SHA-1) or 64 hex chars (SHA-256)
+    if len(text) not in (40, 64):
+        return False
+    
+    try:
+        # Try to parse as hex
+        int(text, 16)
+        return True
+    except ValueError:
+        return False
+
+
+def is_torrent_file(path: str) -> bool:
+    """Check if a file path is a .torrent file.
+    
+    Args:
+        path: File path to check
+        
+    Returns:
+        True if file has .torrent extension
+    """
+    if not path:
+        return False
+    return path.lower().endswith('.torrent')
+
+
+def parse_magnet_or_hash(uri: str) -> Optional[str]:
+    """Parse a magnet URI or hash into a format for AllDebrid API.
+    
+    AllDebrid's magnet/upload endpoint accepts:
+        - Full magnet URIs: magnet:?xt=urn:btih:...
+        - Info hashes: 40 or 64 hex characters
+        
+    Args:
+        uri: Magnet URI or hash
+        
+    Returns:
+        Normalized input for AllDebrid API, or None if invalid
+    """
+    if not uri:
+        return None
+    
+    uri = uri.strip()
+    
+    # Already a magnet link - just return it
+    if is_magnet_link(uri):
+        return uri
+    
+    # Check if it's a valid hash
+    if is_torrent_hash(uri):
+        return uri
+    
+    # Not a recognized format
+    return None
+
+
+# ============================================================================
+# Cmdlet: unlock_link
+# ============================================================================
+
+def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
+    """Unlock a restricted link using AllDebrid.
+    
+    Converts free hosters and restricted links to direct download URLs.
+    
+    Usage:
+        unlock-link <link>
+        unlock-link                    # Uses URL from pipeline result
+    
+    Requires:
+        - AllDebrid API key in config under Debrid.All-debrid
+    
+    Args:
+        result: Pipeline result object
+        args: Command arguments
+        config: Configuration dictionary
+        
+    Returns:
+        0 on success, 1 on failure
+    """
+    try:
+        from .link_utils import (
+            extract_link,
+            get_api_key,
+            add_direct_link_to_result,
+        )
+    except ImportError as e:
+        log(f"Required modules unavailable: {e}", file=sys.stderr)
+        return 1
+    
+    # Get link from args or result
+    link = extract_link(result, args)
+    
+    if not link:
+        log("No valid URL provided", file=sys.stderr)
+        return 1
+    
+    # Get AllDebrid API key from config
+    api_key = get_api_key(config, "AllDebrid", "Debrid.All-debrid")
+    
+    if not api_key:
+        log("AllDebrid API key not configured in Debrid.All-debrid", file=sys.stderr)
+        return 1
+    
+    # Try to unlock the link
+    debug(f"Unlocking: {link}")
+    direct_link = convert_link_with_debrid(link, api_key)
+    
+    if direct_link:
+        debug(f"✓ Direct link: {direct_link}")
+        
+        # Update result with direct link
+        add_direct_link_to_result(result, direct_link, link)
+        
+        # Return the updated result via pipeline context
+        # Note: The cmdlet wrapper will handle emitting to pipeline
+        return 0
+    else:
+        log(f"❌ Failed to unlock link or already unrestricted", file=sys.stderr)
+        return 1
+
+
+# ============================================================================
+# Cmdlet Registration
+# ============================================================================
+
+def _register_unlock_link():
+    """Register unlock-link command with cmdlet registry if available."""
+    try:
+        from cmdlets import register
+        
+        @register(["unlock-link"])
+        def unlock_link_wrapper(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
+            """Wrapper to make unlock_link_cmdlet available as cmdlet."""
+            import pipeline as ctx
+            
+            ret_code = unlock_link_cmdlet(result, args, config)
+            
+            # If successful, emit the result
+            if ret_code == 0:
+                ctx.emit(result)
+            
+            return ret_code
+        
+        return unlock_link_wrapper
+    except ImportError:
+        # If cmdlets module not available, just return None
+        return None
+
+
+# Register when module is imported
+_unlock_link_registration = _register_unlock_link()
--- a/helper/archive_client.py
+++ b/helper/archive_client.py
@@ -0,0 +1,567 @@
+"""Archive.org API client for borrowing and downloading books.
+
+This module provides low-level functions for interacting with Archive.org:
+- Authentication (login, credential management)
+- Borrowing (loan, return_loan)
+- Book metadata extraction (get_book_infos, get_book_metadata)
+- Image downloading and deobfuscation
+- PDF creation with metadata
+
+Used by unified_book_downloader.py for the borrowing workflow.
+"""
+from __future__ import annotations
+
+import base64
+import hashlib
+import logging
+import os
+import re
+import sys
+import time
+from concurrent import futures
+from typing import Any, Dict, List, Optional, Sequence, Tuple
+
+import requests
+
+from helper.logger import log, debug
+
+try:
+    from Crypto.Cipher import AES  # type: ignore
+    from Crypto.Util import Counter  # type: ignore
+except ImportError:
+    AES = None  # type: ignore
+    Counter = None  # type: ignore
+
+try:
+    from tqdm import tqdm  # type: ignore
+except ImportError:
+    tqdm = None  # type: ignore
+
+
+def credential_openlibrary(config: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
+    """Get OpenLibrary/Archive.org email and password from config.
+    
+    Supports both formats:
+    - New: {"provider": {"openlibrary": {"email": "...", "password": "..."}}}
+    - Old: {"Archive": {"email": "...", "password": "..."}}
+           {"archive_org_email": "...", "archive_org_password": "..."}
+    
+    Returns: (email, password) tuple, each can be None
+    """
+    if not isinstance(config, dict):
+        return None, None
+    
+    # Try new format first
+    provider_config = config.get("provider", {})
+    if isinstance(provider_config, dict):
+        openlibrary_config = provider_config.get("openlibrary", {})
+        if isinstance(openlibrary_config, dict):
+            email = openlibrary_config.get("email")
+            password = openlibrary_config.get("password")
+            if email or password:
+                return email, password
+    
+    # Try old nested format
+    archive_config = config.get("Archive")
+    if isinstance(archive_config, dict):
+        email = archive_config.get("email")
+        password = archive_config.get("password")
+        if email or password:
+            return email, password
+    
+    # Fall back to old flat format
+    email = config.get("archive_org_email")
+    password = config.get("archive_org_password")
+    return email, password
+
+
+def display_error(response: requests.Response, message: str) -> None:
+    """Display error and exit."""
+    log(message, file=sys.stderr)
+    log(response.text, file=sys.stderr)
+    sys.exit(1)
+
+
+def login(email: str, password: str) -> requests.Session:
+    """Login to archive.org.
+    
+    Args:
+        email: Archive.org email
+        password: Archive.org password
+        
+    Returns:
+        Authenticated requests.Session
+        
+    Raises:
+        SystemExit on login failure
+    """
+    session = requests.Session()
+    session.get("https://archive.org/account/login", timeout=30)
+
+    data = {"username": email, "password": password}
+    response = session.post("https://archive.org/account/login", data=data, timeout=30)
+
+    if "bad_login" in response.text:
+        log("Invalid credentials!", file=sys.stderr)
+        sys.exit(1)
+    if "Successful login" in response.text:
+        debug("Successful login")
+        return session
+    display_error(response, "[-] Error while login:")
+    sys.exit(1)  # Unreachable but satisfies type checker
+
+
+def loan(session: requests.Session, book_id: str, verbose: bool = True) -> requests.Session:
+    """Borrow a book from archive.org (14-day loan).
+    
+    Args:
+        session: Authenticated requests.Session from login()
+        book_id: Archive.org book identifier (e.g., 'ia_book_id')
+        verbose: Whether to log messages
+        
+    Returns:
+        Session with active loan
+        
+    Raises:
+        SystemExit on loan failure
+    """
+    data = {"action": "grant_access", "identifier": book_id}
+    response = session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
+    data["action"] = "browse_book"
+    response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
+
+    if response.status_code == 400:
+        try:
+            if response.json()["error"] == "This book is not available to borrow at this time. Please try again later.":
+                debug("This book doesn't need to be borrowed")
+                return session
+            display_error(response, "Something went wrong when trying to borrow the book.")
+        except:
+            display_error(response, "The book cannot be borrowed")
+
+    data["action"] = "create_token"
+    response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
+
+    if "token" in response.text:
+        if verbose:
+            debug("Successful loan")
+        return session
+    display_error(response, "Something went wrong when trying to borrow the book.")
+    sys.exit(1)  # Unreachable but satisfies type checker
+
+
+def return_loan(session: requests.Session, book_id: str) -> None:
+    """Return a borrowed book.
+    
+    Args:
+        session: Authenticated requests.Session with active loan
+        book_id: Archive.org book identifier
+    """
+    data = {"action": "return_loan", "identifier": book_id}
+    response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
+    if response.status_code == 200 and response.json()["success"]:
+        debug("Book returned")
+    else:
+        display_error(response, "Something went wrong when trying to return the book")
+
+
+def get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
+    """Extract book information and page links from archive.org viewer.
+    
+    Args:
+        session: Authenticated requests.Session
+        url: Book URL (e.g., https://archive.org/borrow/book_id or /details/book_id)
+        
+    Returns:
+        Tuple of (title, page_links, metadata)
+        
+    Raises:
+        RuntimeError: If page data cannot be extracted
+    """
+    r = session.get(url, timeout=30).text
+    
+    # Try to extract the infos URL from the response
+    try:
+        # Look for the "url" field in the response
+        if '"url":"' not in r:
+            raise ValueError("No 'url' field found in response")
+        infos_url = "https:" + r.split('"url":"')[1].split('"')[0].replace("\\u0026", "&")
+    except (IndexError, ValueError) as e:
+        # If URL extraction fails, raise with better error message
+        raise RuntimeError(f"Failed to extract book info URL from response: {e}")
+    
+    response = session.get(infos_url, timeout=30)
+    data = response.json()["data"]
+    title = data["brOptions"]["bookTitle"].strip().replace(" ", "_")
+    title = "".join(c for c in title if c not in '<>:"/\\|?*')  # Filter forbidden chars
+    title = title[:150]  # Trim to avoid long file names
+    metadata = data["metadata"]
+    links = []
+    
+    # Safely extract page links from brOptions data
+    try:
+        br_data = data.get("brOptions", {}).get("data", [])
+        for item in br_data:
+            if isinstance(item, list):
+                for page in item:
+                    if isinstance(page, dict) and "uri" in page:
+                        links.append(page["uri"])
+            elif isinstance(item, dict) and "uri" in item:
+                links.append(item["uri"])
+    except (KeyError, IndexError, TypeError) as e:
+        log(f"Warning: Error parsing page links: {e}", file=sys.stderr)
+        # Continue with whatever links we found
+
+    if len(links) > 1:
+        debug(f"Found {len(links)} pages")
+        return title, links, metadata
+    elif len(links) == 1:
+        debug(f"Found {len(links)} page")
+        return title, links, metadata
+    else:
+        log("Error while getting image links - no pages found", file=sys.stderr)
+        raise RuntimeError("No pages found in book data")
+
+
+def image_name(pages: int, page: int, directory: str) -> str:
+    """Generate image filename for page.
+    
+    Args:
+        pages: Total number of pages
+        page: Current page number (0-indexed)
+        directory: Directory to save to
+        
+    Returns:
+        Full path to image file
+    """
+    return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
+
+
+def deobfuscate_image(image_data: bytes, link: str, obf_header: str) -> bytes:
+    """Decrypt obfuscated image data using AES-CTR.
+    
+    This handles Archive.org's image obfuscation for borrowed books.
+    Based on: https://github.com/justimm
+    
+    Args:
+        image_data: Encrypted image bytes
+        link: Image URL (used to derive AES key)
+        obf_header: X-Obfuscate header value (format: "1|BASE64_COUNTER")
+        
+    Returns:
+        Decrypted image bytes
+    """
+    if not AES or not Counter:
+        raise RuntimeError("Crypto library not available")
+
+    try:
+        version, counter_b64 = obf_header.split("|")
+    except Exception as e:
+        raise ValueError("Invalid X-Obfuscate header format") from e
+
+    if version != "1":
+        raise ValueError("Unsupported obfuscation version: " + version)
+
+    # Derive AES key from URL
+    aesKey = re.sub(r"^https?:\/\/.*?\/", "/", link)
+    sha1_digest = hashlib.sha1(aesKey.encode("utf-8")).digest()
+    key = sha1_digest[:16]
+
+    # Decode counter
+    counter_bytes = base64.b64decode(counter_b64)
+    if len(counter_bytes) != 16:
+        raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
+
+    prefix = counter_bytes[:8]
+    initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
+
+    # Create AES-CTR cipher
+    ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False)  # type: ignore
+    cipher = AES.new(key, AES.MODE_CTR, counter=ctr)  # type: ignore
+
+    decrypted_part = cipher.decrypt(image_data[:1024])
+    new_data = decrypted_part + image_data[1024:]
+    return new_data
+
+
+def download_one_image(
+    session: requests.Session,
+    link: str,
+    i: int,
+    directory: str,
+    book_id: str,
+    pages: int,
+) -> None:
+    """Download a single book page image.
+    
+    Handles obfuscated images and re-borrowing on 403 errors.
+    
+    Args:
+        session: Authenticated requests.Session
+        link: Direct image URL
+        i: Page index (0-based)
+        directory: Directory to save to
+        book_id: Archive.org book ID (for re-borrowing on 403)
+        pages: Total number of pages
+    """
+    headers = {
+        "Referer": "https://archive.org/",
+        "Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
+        "Sec-Fetch-Site": "same-site",
+        "Sec-Fetch-Mode": "no-cors",
+        "Sec-Fetch-Dest": "image",
+    }
+    retry = True
+    response = None
+    while retry:
+        try:
+            response = session.get(link, headers=headers, timeout=30)
+            if response.status_code == 403:
+                session = loan(session, book_id, verbose=False)
+                raise Exception("Borrow again")
+            if response.status_code == 200:
+                retry = False
+        except:
+            time.sleep(1)
+
+    image = image_name(pages, i, directory)
+
+    if response is None:
+        log(f"Failed to download page {i}", file=sys.stderr)
+        return
+
+    obf_header = response.headers.get("X-Obfuscate")
+    image_content = None
+    if obf_header:
+        try:
+            image_content = deobfuscate_image(response.content, link, obf_header)
+        except Exception as e:
+            log(f"Deobfuscation failed: {e}", file=sys.stderr)
+            return
+    else:
+        image_content = response.content
+
+    with open(image, "wb") as f:
+        f.write(image_content)
+
+
+def download(
+    session: requests.Session,
+    n_threads: int,
+    directory: str,
+    links: List[str],
+    scale: int,
+    book_id: str,
+) -> List[str]:
+    """Download all book pages as images.
+    
+    Uses thread pool for parallel downloads.
+    
+    Args:
+        session: Authenticated requests.Session
+        n_threads: Number of download threads
+        directory: Directory to save images to
+        links: List of image URLs
+        scale: Image resolution (0=highest, 10=lowest)
+        book_id: Archive.org book ID (for re-borrowing)
+        
+    Returns:
+        List of downloaded image file paths
+    """
+    debug("Downloading pages...")
+    links = [f"{link}&rotate=0&scale={scale}" for link in links]
+    pages = len(links)
+
+    tasks = []
+    with futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
+        for link in links:
+            i = links.index(link)
+            tasks.append(
+                executor.submit(
+                    download_one_image,
+                    session=session,
+                    link=link,
+                    i=i,
+                    directory=directory,
+                    book_id=book_id,
+                    pages=pages,
+                )
+            )
+        if tqdm:
+            for _ in tqdm(futures.as_completed(tasks), total=len(tasks)):  # type: ignore
+                pass
+        else:
+            for _ in futures.as_completed(tasks):
+                pass
+
+    images = [image_name(pages, i, directory) for i in range(len(links))]
+    return images
+
+
+def check_direct_download(book_id: str) -> Tuple[bool, str]:
+    """Check if a book can be downloaded directly without borrowing.
+    
+    Searches Archive.org metadata for downloadable PDF files.
+    
+    Args:
+        book_id: Archive.org book identifier
+        
+    Returns:
+        Tuple of (can_download: bool, pdf_url: str)
+    """
+    try:
+        # First, try to get the metadata to find the actual PDF filename
+        metadata_url = f"https://archive.org/metadata/{book_id}"
+        response = requests.get(metadata_url, timeout=10)
+        response.raise_for_status()
+        metadata = response.json()
+        
+        # Find PDF file in files list
+        if "files" in metadata:
+            for file_info in metadata["files"]:
+                filename = file_info.get("name", "")
+                if filename.endswith(".pdf") and file_info.get("source") == "original":
+                    # Found the original PDF
+                    pdf_filename = filename
+                    pdf_url = f"https://archive.org/download/{book_id}/{pdf_filename.replace(' ', '%20')}"
+                    
+                    # Verify it's accessible
+                    check_response = requests.head(pdf_url, timeout=5, allow_redirects=True)
+                    if check_response.status_code == 200:
+                        return True, pdf_url
+        
+        return False, ""
+        
+    except Exception as e:
+        log(f"Error checking direct download: {e}", file=sys.stderr)
+        return False, ""
+
+
+def get_openlibrary_by_isbn(isbn: str) -> Dict[str, Any]:
+    """Fetch book data from OpenLibrary using ISBN.
+    
+    Args:
+        isbn: ISBN-10 or ISBN-13 to search for
+        
+    Returns:
+        Dictionary with book metadata from OpenLibrary
+    """
+    try:
+        # Try ISBN API first
+        api_url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn}&jscmd=data&format=json"
+        response = requests.get(api_url, timeout=10)
+        response.raise_for_status()
+        data = response.json()
+        
+        if data:
+            # Get first result
+            key = list(data.keys())[0]
+            return data[key]
+        return {}
+    except Exception as e:
+        log(f"Error fetching OpenLibrary data by ISBN: {e}", file=sys.stderr)
+        return {}
+
+
+def extract_isbn_from_metadata(metadata: Dict[str, Any]) -> str:
+    """Extract ISBN from archive.org metadata.
+    
+    Looks for ISBN in various metadata fields.
+    
+    Args:
+        metadata: Archive.org metadata dictionary
+        
+    Returns:
+        ISBN string (clean, no hyphens) or empty string if not found
+    """
+    # Try various common metadata fields
+    isbn_fields = [
+        "isbn", "ISBN", "isbn_13", "isbn_10", "isbns",
+        "isbn-10", "isbn-13", "identifer_isbn"
+    ]
+    
+    for field in isbn_fields:
+        if field in metadata:
+            isbn_val = metadata[field]
+            if isinstance(isbn_val, list):
+                isbn_val = isbn_val[0] if isbn_val else None
+            if isbn_val and isinstance(isbn_val, str):
+                # Clean ISBN (remove hyphens, spaces)
+                isbn_clean = isbn_val.replace("-", "").replace(" ", "")
+                if len(isbn_clean) in [10, 13]:
+                    return isbn_clean
+    
+    return ""
+
+
+def normalize_url(url: str) -> str:
+    """Convert openlibrary.org URL to archive.org URL.
+    
+    Looks up the actual Archive.org ID from OpenLibrary API.
+    
+    Args:
+        url: Book URL (archive.org or openlibrary.org format)
+        
+    Returns:
+        Normalized archive.org URL
+    """
+    url = url.strip()
+    
+    # Already archive.org format
+    if url.startswith("https://archive.org/details/"):
+        return url
+    
+    # Convert openlibrary.org format by querying the OpenLibrary API
+    if "openlibrary.org/books/" in url:
+        try:
+            # Extract the book ID (e.g., OL6796852M)
+            parts = url.split("/books/")
+            if len(parts) > 1:
+                book_id = parts[1].split("/")[0]
+                
+                # Query OpenLibrary API to get the book metadata
+                api_url = f"https://openlibrary.org/books/{book_id}.json"
+                response = requests.get(api_url, timeout=10)
+                response.raise_for_status()
+                data = response.json()
+                
+                # Look for identifiers including internet_archive or ocaid
+                # First try ocaid (Open Content Alliance ID) - this is most common
+                if "ocaid" in data:
+                    ocaid = data["ocaid"]
+                    return f"https://archive.org/details/{ocaid}"
+                
+                # Check for identifiers object
+                if "identifiers" in data:
+                    identifiers = data["identifiers"]
+                    
+                    # Look for internet_archive ID
+                    if "internet_archive" in identifiers:
+                        ia_ids = identifiers["internet_archive"]
+                        if isinstance(ia_ids, list) and ia_ids:
+                            ia_id = ia_ids[0]
+                        else:
+                            ia_id = ia_ids
+                        return f"https://archive.org/details/{ia_id}"
+                
+                # If no IA identifier found, use the book ID as fallback
+                log(f"No Internet Archive ID found for {book_id}. Attempting with OpenLibrary ID.", file=sys.stderr)
+                return f"https://archive.org/details/{book_id}"
+                
+        except requests.RequestException as e:
+            log(f"Could not fetch OpenLibrary metadata: {e}", file=sys.stderr)
+            # Fallback to using the book ID directly
+            parts = url.split("/books/")
+            if len(parts) > 1:
+                book_id = parts[1].split("/")[0]
+                return f"https://archive.org/details/{book_id}"
+        except (KeyError, IndexError) as e:
+            log(f"Error parsing OpenLibrary response: {e}", file=sys.stderr)
+            # Fallback to using the book ID directly
+            parts = url.split("/books/")
+            if len(parts) > 1:
+                book_id = parts[1].split("/")[0]
+                return f"https://archive.org/details/{book_id}"
+    
+    # Return original if can't parse
+    return url
--- a/helper/download.py
+++ b/helper/download.py
@@ -0,0 +1,730 @@
+"""Download media files using yt-dlp with support for direct file downloads.
+
+Lean, focused downloader without event infrastructure overhead.
+- yt-dlp integration for streaming sites
+- Direct file download fallback for PDFs, images, documents
+- Tag extraction via metadata.extract_ytdlp_tags()
+- Logging via helper.logger.log()
+"""
+from __future__ import annotations
+
+import re  # noqa: F401
+import sys
+import time
+import traceback
+from pathlib import Path
+from typing import Any, Dict, Iterator, List, Optional
+from urllib.parse import urljoin
+
+import httpx
+
+from helper.logger import log, debug
+from .utils import ensure_directory, sha256_file
+from .http_client import HTTPClient
+from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar
+
+try:
+    import yt_dlp  # type: ignore
+    from yt_dlp.extractor import gen_extractors  # type: ignore
+except Exception as exc:
+    yt_dlp = None  # type: ignore
+    YTDLP_IMPORT_ERROR = exc
+else:
+    YTDLP_IMPORT_ERROR = None
+
+try:
+    from metadata import extract_ytdlp_tags
+except ImportError:
+    extract_ytdlp_tags = None
+
+_EXTRACTOR_CACHE: List[Any] | None = None
+
+
+def _ensure_yt_dlp_ready() -> None:
+    """Verify yt-dlp is available, raise if not."""
+    if yt_dlp is not None:
+        return
+    detail = str(YTDLP_IMPORT_ERROR or "yt-dlp is not installed")
+    raise DownloadError(f"yt-dlp module not available: {detail}")
+
+
+def _progress_callback(status: Dict[str, Any]) -> None:
+    """Simple progress callback using logger."""
+    event = status.get("status")
+    if event == "downloading":
+        percent = status.get("_percent_str", "?")
+        speed = status.get("_speed_str", "?")
+        debug(f"Downloading {percent} at {speed}")
+    elif event == "finished":
+        debug(f"✓ Download finished: {status.get('filename')}")
+    elif event in ("postprocessing", "processing"):
+        debug(f"Post-processing: {status.get('postprocessor')}")
+
+
+def is_url_supported_by_ytdlp(url: str) -> bool:
+    """Check if URL is supported by yt-dlp."""
+    if yt_dlp is None:
+        return False
+    global _EXTRACTOR_CACHE
+    if _EXTRACTOR_CACHE is None:
+        try:
+            _EXTRACTOR_CACHE = [ie for ie in gen_extractors()]  # type: ignore[arg-type]
+        except Exception:
+            _EXTRACTOR_CACHE = []
+    for extractor in _EXTRACTOR_CACHE:
+        try:
+            if not extractor.suitable(url):
+                continue
+        except Exception:
+            continue
+        name = getattr(extractor, "IE_NAME", "")
+        if name.lower() == "generic":
+            continue
+        return True
+    return False
+
+
+def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
+    """Get list of available formats for a URL using yt-dlp.
+    
+    Args:
+        url: URL to get formats for
+        no_playlist: If True, ignore playlists and list formats for single video
+        playlist_items: If specified, only list formats for these playlist items (e.g., "1,3,5-8")
+        
+    Returns:
+        List of format dictionaries with keys: format_id, format, resolution, fps, vcodec, acodec, filesize, etc.
+        Returns None if yt-dlp is not available or format listing fails.
+    """
+    _ensure_yt_dlp_ready()
+
+    try:
+        ydl_opts = {
+            "quiet": False,
+            "no_warnings": False,
+            "socket_timeout": 30,
+        }
+        
+        # Add no_playlist option if specified
+        if no_playlist:
+            ydl_opts["noplaylist"] = True
+        
+        # Add playlist_items filter if specified
+        if playlist_items:
+            ydl_opts["playlist_items"] = playlist_items
+
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            debug(f"Fetching format list for: {url}")
+            info = ydl.extract_info(url, download=False)
+            
+            formats = info.get("formats", [])
+            if not formats:
+                log("No formats available", file=sys.stderr)
+                return None
+            
+            # Parse and extract relevant format info
+            result_formats = []
+            for fmt in formats:
+                format_info = {
+                    "format_id": fmt.get("format_id", ""),
+                    "format": fmt.get("format", ""),
+                    "ext": fmt.get("ext", ""),
+                    "resolution": fmt.get("resolution", ""),
+                    "width": fmt.get("width"),
+                    "height": fmt.get("height"),
+                    "fps": fmt.get("fps"),
+                    "vcodec": fmt.get("vcodec", "none"),
+                    "acodec": fmt.get("acodec", "none"),
+                    "filesize": fmt.get("filesize"),
+                    "tbr": fmt.get("tbr"),  # Total bitrate
+                }
+                result_formats.append(format_info)
+            
+            debug(f"Found {len(result_formats)} available formats")
+            return result_formats
+    
+    except Exception as e:
+        log(f"✗ Error fetching formats: {e}", file=sys.stderr)
+        return None
+def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
+    """Build yt-dlp download options."""
+    ensure_directory(opts.output_dir)
+
+    outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve())
+
+    base_options: Dict[str, Any] = {
+        "outtmpl": outtmpl,
+        "quiet": False,
+        "no_warnings": False,
+        "noprogress": False,
+        "socket_timeout": 30,
+        "retries": 10,
+        "fragment_retries": 10,
+        "http_chunk_size": 10_485_760,
+        "restrictfilenames": True,
+        "progress_hooks": [_progress_callback],
+    }
+
+    if opts.cookies_path and opts.cookies_path.is_file():
+        base_options["cookiefile"] = str(opts.cookies_path)
+
+    # Add no-playlist option if specified (for single video from playlist URLs)
+    if opts.no_playlist:
+        base_options["noplaylist"] = True
+
+    # Configure based on mode
+    if opts.mode == "audio":
+        base_options["format"] = opts.ytdl_format or "251/140/bestaudio"
+        base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}]
+    else:  # video
+        base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
+        base_options["format_sort"] = [
+            "res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"
+        ]
+
+    # Add clip sections if provided
+    if opts.clip_sections:
+        base_options["download_sections"] = opts.clip_sections
+
+    # Add playlist items selection if provided
+    if opts.playlist_items:
+        base_options["playlist_items"] = opts.playlist_items
+
+    debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}")
+    return base_options
+
+
+def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
+    """Iterate through download entries, handling playlists."""
+    queue: List[Dict[str, Any]] = [info]
+    seen: set[int] = set()
+    while queue:
+        current = queue.pop(0)
+        obj_id = id(current)
+        if obj_id in seen:
+            continue
+        seen.add(obj_id)
+        entries = current.get("entries")
+        if isinstance(entries, list):
+            for entry in entries:
+                if isinstance(entry, dict):
+                    queue.append(entry)
+        if current.get("requested_downloads") or not entries:
+            yield current
+
+
+def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]:
+    """Get candidate file paths for downloaded media."""
+    requested = entry.get("requested_downloads")
+    if isinstance(requested, list):
+        for item in requested:
+            if isinstance(item, dict):
+                for key in ("filepath", "_filename", "filename"):
+                    value = item.get(key)
+                    if value:
+                        yield Path(value)
+    for key in ("filepath", "_filename", "filename"):
+        value = entry.get(key)
+        if value:
+            yield Path(value)
+    if entry.get("filename"):
+        yield output_dir / entry["filename"]
+
+
+def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]:
+    """Find downloaded file in yt-dlp metadata."""
+    for entry in _iter_download_entries(info):
+        for candidate in _candidate_paths(entry, output_dir):
+            if candidate.is_file():
+                return entry, candidate
+            if not candidate.is_absolute():
+                resolved = output_dir / candidate
+                if resolved.is_file():
+                    return entry, resolved
+    raise FileNotFoundError("yt-dlp did not report a downloaded media file")
+
+
+def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
+    """Extract SHA256 hash from yt-dlp metadata."""
+    for payload in [info] + info.get("entries", []):
+        if not isinstance(payload, dict):
+            continue
+        hashes = payload.get("hashes")
+        if isinstance(hashes, dict):
+            for key in ("sha256", "sha-256", "sha_256"):
+                value = hashes.get(key)
+                if isinstance(value, str) and value.strip():
+                    return value.strip().lower()
+        for key in ("sha256", "sha-256", "sha_256"):
+            value = payload.get(key)
+            if isinstance(value, str) and value.strip():
+                return value.strip().lower()
+    return None
+
+
+def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
+    """Extract the actual download link from LibGen redirect URL.
+    
+    LibGen URLs like https://libgen.gl/file.php?id=123456 redirect to
+    actual mirror URLs. This follows the redirect chain to get the real file.
+    
+    Args:
+        libgen_url: LibGen file.php URL
+        
+    Returns:
+        Actual download URL or None if extraction fails
+    """
+    try:
+        import requests
+        from urllib.parse import urlparse
+        
+        # Check if this is a LibGen URL
+        parsed = urlparse(libgen_url)
+        if 'libgen' not in parsed.netloc.lower():
+            return None
+        
+        if '/file.php' not in parsed.path.lower():
+            return None
+        
+        # LibGen redirects to actual mirrors, follow redirects to get final URL
+        session = requests.Session()
+        session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+        })
+        
+        debug(f"Following LibGen redirect chain for: {libgen_url}")
+        
+        # First, get the page and look for direct download link
+        try:
+            response = session.get(libgen_url, timeout=10, allow_redirects=True)
+            final_url = response.url
+            
+            # Try to find actual download link in the page
+            try:
+                from bs4 import BeautifulSoup
+                soup = BeautifulSoup(response.content, 'html.parser')
+                
+                # Look for download links - LibGen typically has forms with download buttons
+                # Look for all links and forms that might lead to download
+                for link in soup.find_all('a'):
+                    href = link.get('href')
+                    if href and isinstance(href, str):
+                        # Look for direct file links or get.php redirects
+                        if 'get.php' in href.lower() or href.endswith(('.pdf', '.epub', '.djvu', '.mobi')):
+                            download_url = href if href.startswith('http') else urljoin(final_url, href)
+                            debug(f"Found download link: {download_url}")
+                            return download_url
+            except ImportError:
+                pass  # BeautifulSoup not available
+            
+            # If we followed redirects successfully, return the final URL
+            # This handles cases where libgen redirects to a direct download mirror
+            if final_url != libgen_url:
+                debug(f"LibGen resolved to mirror: {final_url}")
+                return final_url
+        
+        except requests.RequestException as e:
+            log(f"Error following LibGen redirects: {e}", file=sys.stderr)
+            # Try head request as fallback
+            try:
+                response = session.head(libgen_url, allow_redirects=True, timeout=10)
+                if response.url != libgen_url:
+                    debug(f"LibGen HEAD resolved to: {response.url}")
+                    return response.url
+            except:
+                pass
+        
+        return None
+        
+    except Exception as e:
+        log(f"Error resolving LibGen URL: {e}", file=sys.stderr)
+        return None
+
+
+def _download_direct_file(
+    url: str,
+    output_dir: Path,
+    debug_logger: Optional[DebugLogger] = None,
+) -> DownloadMediaResult:
+    """Download a direct file (PDF, image, document, etc.) without yt-dlp."""
+    ensure_directory(output_dir)
+
+    from urllib.parse import unquote, urlparse, parse_qs
+    import re
+    
+    # Extract filename from URL
+    parsed_url = urlparse(url)
+    url_path = parsed_url.path
+    
+    # Try to get filename from query parameters first (for LibGen and similar services)
+    # e.g., ?filename=Book+Title.pdf or &download=filename.pdf
+    filename = None
+    if parsed_url.query:
+        query_params = parse_qs(parsed_url.query)
+        for param_name in ('filename', 'download', 'file', 'name'):
+            if param_name in query_params and query_params[param_name]:
+                filename = query_params[param_name][0]
+                filename = unquote(filename)
+                break
+    
+    # If not found in query params, extract from URL path
+    if not filename or not filename.strip():
+        filename = url_path.split("/")[-1] if url_path else ""
+        filename = unquote(filename)
+    
+    # Remove query strings from filename if any
+    if "?" in filename:
+        filename = filename.split("?")[0]
+    
+    # Try to get real filename from Content-Disposition header (HEAD request)
+    try:
+        with HTTPClient(timeout=10.0) as client:
+            response = client._request("HEAD", url, follow_redirects=True)
+            content_disposition = response.headers.get("content-disposition", "")
+            if content_disposition:
+                # Extract filename from Content-Disposition header
+                # Format: attachment; filename="filename.pdf" or filename=filename.pdf
+                match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
+                if match:
+                    extracted_name = match.group(1) or match.group(2)
+                    if extracted_name:
+                        filename = unquote(extracted_name)
+                        debug(f"Filename from Content-Disposition: {filename}")
+    except Exception as e:
+        log(f"Could not get filename from headers: {e}", file=sys.stderr)
+    
+    # Fallback if we still don't have a good filename
+    if not filename or "." not in filename:
+        filename = "downloaded_file.bin"
+
+    file_path = output_dir / filename
+    progress_bar = ProgressBar()
+
+    debug(f"Direct download: {filename}")
+
+    try:
+        start_time = time.time()
+        downloaded_bytes = [0]
+        total_bytes = [0]
+        last_progress_time = [start_time]
+
+        def progress_callback(bytes_downloaded: int, content_length: int) -> None:
+            downloaded_bytes[0] = bytes_downloaded
+            total_bytes[0] = content_length
+
+            now = time.time()
+            if now - last_progress_time[0] >= 0.5 and total_bytes[0] > 0:
+                elapsed = now - start_time
+                percent = (bytes_downloaded / content_length) * 100 if content_length > 0 else 0
+                speed = bytes_downloaded / elapsed if elapsed > 0 else 0
+                eta_seconds = (content_length - bytes_downloaded) / speed if speed > 0 else 0
+
+                speed_str = progress_bar.format_bytes(speed) + "/s"
+                minutes, seconds = divmod(int(eta_seconds), 60)
+                hours, minutes = divmod(minutes, 60)
+                eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
+
+                progress_line = progress_bar.format_progress(
+                    percent_str=f"{percent:.1f}%",
+                    downloaded=bytes_downloaded,
+                    total=content_length,
+                    speed_str=speed_str,
+                    eta_str=eta_str,
+                )
+                debug(progress_line)
+                last_progress_time[0] = now
+
+        with HTTPClient(timeout=30.0) as client:
+            client.download(url, str(file_path), progress_callback=progress_callback)
+
+        elapsed = time.time() - start_time
+        avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
+        debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}")
+
+        # For direct file downloads, create minimal info dict without filename as title
+        # This prevents creating duplicate title: tags when filename gets auto-generated
+        # We'll add title back later only if we couldn't extract meaningful tags
+        info = {
+            "id": filename.rsplit(".", 1)[0],
+            "ext": filename.rsplit(".", 1)[1] if "." in filename else "bin",
+            "webpage_url": url,
+        }
+
+        hash_value = None
+        try:
+            hash_value = sha256_file(file_path)
+        except Exception:
+            pass
+
+        tags = []
+        if extract_ytdlp_tags:
+            try:
+                tags = extract_ytdlp_tags(info)
+            except Exception as e:
+                log(f"Error extracting tags: {e}", file=sys.stderr)
+
+        # Only use filename as a title tag if we couldn't extract any meaningful tags
+        # This prevents duplicate title: tags when the filename could be mistaken for metadata
+        if not any(t.startswith('title:') for t in tags):
+            # Re-extract tags with filename as title only if needed
+            info['title'] = filename
+            tags = []
+            if extract_ytdlp_tags:
+                try:
+                    tags = extract_ytdlp_tags(info)
+                except Exception as e:
+                    log(f"Error extracting tags with filename: {e}", file=sys.stderr)
+
+        if debug_logger is not None:
+            debug_logger.write_record(
+                "direct-file-downloaded",
+                {"url": url, "path": str(file_path), "hash": hash_value},
+            )
+
+        return DownloadMediaResult(
+            path=file_path,
+            info=info,
+            tags=tags,
+            source_url=url,
+            hash_value=hash_value,
+        )
+
+    except (httpx.HTTPError, httpx.RequestError) as exc:
+        log(f"Download error: {exc}", file=sys.stderr)
+        if debug_logger is not None:
+            debug_logger.write_record(
+                "exception",
+                {"phase": "direct-file", "url": url, "error": str(exc)},
+            )
+        raise DownloadError(f"Failed to download {url}: {exc}") from exc
+    except Exception as exc:
+        log(f"Error downloading file: {exc}", file=sys.stderr)
+        if debug_logger is not None:
+            debug_logger.write_record(
+                "exception",
+                {
+                    "phase": "direct-file",
+                    "url": url,
+                    "error": str(exc),
+                    "traceback": traceback.format_exc(),
+                },
+            )
+        raise DownloadError(f"Error downloading file: {exc}") from exc
+
+
+def probe_url(url: str, no_playlist: bool = False) -> Optional[Dict[str, Any]]:
+    """Probe URL to extract metadata WITHOUT downloading.
+    
+    Args:
+        url: URL to probe
+        no_playlist: If True, ignore playlists and probe only the single video
+    
+    Returns:
+        Dict with keys: extractor, title, entries (if playlist), duration, etc.
+        Returns None if not supported by yt-dlp.
+    """
+    if not is_url_supported_by_ytdlp(url):
+        return None
+    
+    _ensure_yt_dlp_ready()
+    
+    assert yt_dlp is not None
+    try:
+        # Extract info without downloading
+        # Use extract_flat='in_playlist' to get full metadata for playlist items
+        ydl_opts = {
+            "quiet": True,  # Suppress all output
+            "no_warnings": True,
+            "socket_timeout": 10,
+            "retries": 3,
+            "skip_download": True,  # Don't actually download
+            "extract_flat": "in_playlist",  # Get playlist with metadata for each entry
+            "noprogress": True,  # No progress bars
+            "quiet": True,
+        }
+        
+        # Add no_playlist option if specified
+        if no_playlist:
+            ydl_opts["noplaylist"] = True
+        
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:  # type: ignore[arg-type]
+            info = ydl.extract_info(url, download=False)
+        
+        if not isinstance(info, dict):
+            return None
+        
+        # Extract relevant fields
+        return {
+            "extractor": info.get("extractor", ""),
+            "title": info.get("title", ""),
+            "entries": info.get("entries", []),  # Will be populated if playlist
+            "duration": info.get("duration"),
+            "uploader": info.get("uploader"),
+            "description": info.get("description"),
+            "url": url,
+        }
+    except Exception as exc:
+        log(f"Probe failed for {url}: {exc}")
+        return None
+
+
+def download_media(
+    opts: DownloadOptions,
+    *,
+    debug_logger: Optional[DebugLogger] = None,
+) -> DownloadMediaResult:
+    """Download media from URL using yt-dlp or direct HTTP download.
+    
+    Args:
+        opts: DownloadOptions with url, mode, output_dir, etc.
+        debug_logger: Optional debug logger for troubleshooting
+        
+    Returns:
+        DownloadMediaResult with path, info, tags, hash
+        
+    Raises:
+        DownloadError: If download fails
+    """
+    # Handle LibGen URLs specially
+    # file.php redirects to mirrors, get.php is direct from modern API
+    if 'libgen' in opts.url.lower():
+        if '/get.php' in opts.url.lower():
+            # Modern API get.php links are direct downloads from mirrors (not file redirects)
+            log(f"Detected LibGen get.php URL, downloading directly...")
+            if debug_logger is not None:
+                debug_logger.write_record("libgen-direct", {"url": opts.url})
+            return _download_direct_file(opts.url, opts.output_dir, debug_logger)
+        elif '/file.php' in opts.url.lower():
+            # Old-style file.php redirects to mirrors, we need to resolve
+            log(f"Detected LibGen file.php URL, resolving to actual mirror...")
+            actual_url = _get_libgen_download_url(opts.url)
+            if actual_url and actual_url != opts.url:
+                log(f"Resolved LibGen URL to mirror: {actual_url}")
+                opts.url = actual_url
+                # After resolution, this will typically be an onion link or direct file
+                # Skip yt-dlp for this (it won't support onion/mirrors), go direct
+                if debug_logger is not None:
+                    debug_logger.write_record("libgen-resolved", {"original": opts.url, "resolved": actual_url})
+                return _download_direct_file(opts.url, opts.output_dir, debug_logger)
+            else:
+                log(f"Could not resolve LibGen URL, trying direct download anyway", file=sys.stderr)
+                if debug_logger is not None:
+                    debug_logger.write_record("libgen-resolve-failed", {"url": opts.url})
+                return _download_direct_file(opts.url, opts.output_dir, debug_logger)
+    
+    # Try yt-dlp first if URL is supported
+    if not is_url_supported_by_ytdlp(opts.url):
+        log(f"URL not supported by yt-dlp, trying direct download: {opts.url}")
+        if debug_logger is not None:
+            debug_logger.write_record("direct-file-attempt", {"url": opts.url})
+        return _download_direct_file(opts.url, opts.output_dir, debug_logger)
+
+    _ensure_yt_dlp_ready()
+
+    ytdl_options = _build_ytdlp_options(opts)
+    log(f"Starting yt-dlp download: {opts.url}")
+    if debug_logger is not None:
+        debug_logger.write_record("ytdlp-start", {"url": opts.url})
+
+    assert yt_dlp is not None
+    try:
+        with yt_dlp.YoutubeDL(ytdl_options) as ydl:  # type: ignore[arg-type]
+            info = ydl.extract_info(opts.url, download=True)
+    except Exception as exc:
+        log(f"yt-dlp failed: {exc}", file=sys.stderr)
+        if debug_logger is not None:
+            debug_logger.write_record(
+                "exception",
+                {
+                    "phase": "yt-dlp",
+                    "error": str(exc),
+                    "traceback": traceback.format_exc(),
+                },
+            )
+        raise DownloadError("yt-dlp download failed") from exc
+
+    if not isinstance(info, dict):
+        log(f"Unexpected yt-dlp response: {type(info)}", file=sys.stderr)
+        raise DownloadError("Unexpected yt-dlp response type")
+
+    info_dict: Dict[str, Any] = info
+    if debug_logger is not None:
+        debug_logger.write_record(
+            "ytdlp-info",
+            {
+                "keys": sorted(info_dict.keys()),
+                "is_playlist": bool(info_dict.get("entries")),
+            },
+        )
+
+    try:
+        entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir)
+    except FileNotFoundError as exc:
+        log(f"Error: {exc}", file=sys.stderr)
+        if debug_logger is not None:
+            debug_logger.write_record(
+                "exception",
+                {"phase": "resolve-path", "error": str(exc)},
+            )
+        raise DownloadError(str(exc)) from exc
+
+    if debug_logger is not None:
+        debug_logger.write_record(
+            "resolved-media",
+            {"path": str(media_path), "entry_keys": sorted(entry.keys())},
+        )
+
+    # Extract hash from metadata or compute
+    hash_value = _extract_sha256(entry) or _extract_sha256(info_dict)
+    if not hash_value:
+        try:
+            hash_value = sha256_file(media_path)
+        except OSError as exc:
+            if debug_logger is not None:
+                debug_logger.write_record(
+                    "hash-error",
+                    {"path": str(media_path), "error": str(exc)},
+                )
+
+    # Extract tags using metadata.py
+    tags = []
+    if extract_ytdlp_tags:
+        try:
+            tags = extract_ytdlp_tags(entry)
+        except Exception as e:
+            log(f"Error extracting tags: {e}", file=sys.stderr)
+
+    source_url = (
+        entry.get("webpage_url")
+        or entry.get("original_url")
+        or entry.get("url")
+    )
+
+    log(f"✓ Downloaded: {media_path.name} ({len(tags)} tags)")
+    if debug_logger is not None:
+        debug_logger.write_record(
+            "downloaded",
+            {
+                "path": str(media_path),
+                "tag_count": len(tags),
+                "source_url": source_url,
+                "sha256": hash_value,
+            },
+        )
+
+    return DownloadMediaResult(
+        path=media_path,
+        info=entry,
+        tags=tags,
+        source_url=source_url,
+        hash_value=hash_value,
+    )
+
+
+__all__ = [
+    "download_media",
+    "is_url_supported_by_ytdlp",
+    "DownloadError",
+    "DownloadOptions",
+    "DownloadMediaResult",
+]
--- a/helper/file_server.py
+++ b/helper/file_server.py
@@ -0,0 +1,180 @@
+"""Simple HTTP file server for serving files in web mode."""
+
+import threading
+import socket
+import logging
+from http.server import HTTPServer, SimpleHTTPRequestHandler
+from pathlib import Path
+from typing import Optional
+import mimetypes
+import urllib.parse
+
+logger = logging.getLogger(__name__)
+
+# Global server instance
+_file_server: Optional[HTTPServer] = None
+_server_thread: Optional[threading.Thread] = None
+_server_port: int = 8001
+
+
+class FileServerHandler(SimpleHTTPRequestHandler):
+    """HTTP request handler for file serving."""
+    
+    def do_GET(self):
+        """Handle GET requests."""
+        # Parse the path
+        parsed_path = urllib.parse.urlparse(self.path)
+        file_path = urllib.parse.unquote(parsed_path.path)
+        
+        # Remove leading slash
+        if file_path.startswith('/'):
+            file_path = file_path[1:]
+        
+        # Decode the file path (it's URL encoded)
+        try:
+            full_path = Path(file_path).resolve()
+            
+            # Security check: ensure the path is within allowed directories
+            # For now, allow all paths (can be restricted later)
+            
+            if full_path.is_file() and full_path.exists():
+                # Serve the file
+                logger.debug(f"Serving file: {full_path}")
+                
+                # Determine content type
+                content_type, _ = mimetypes.guess_type(str(full_path))
+                if content_type is None:
+                    content_type = 'application/octet-stream'
+                
+                try:
+                    with open(full_path, 'rb') as f:
+                        file_content = f.read()
+                    
+                    self.send_response(200)
+                    self.send_header('Content-type', content_type)
+                    self.send_header('Content-Length', str(len(file_content)))
+                    self.send_header('Content-Disposition', f'attachment; filename="{full_path.name}"')
+                    self.end_headers()
+                    self.wfile.write(file_content)
+                    logger.info(f"Successfully served file: {full_path.name}")
+                    return
+                except Exception as e:
+                    logger.error(f"Error serving file: {e}")
+                    self.send_error(500, "Internal server error")
+                    return
+            else:
+                logger.warning(f"File not found: {full_path}")
+                self.send_error(404, "File not found")
+                return
+        
+        except Exception as e:
+            logger.error(f"Error handling request: {e}")
+            self.send_error(400, "Bad request")
+    
+    def log_message(self, format, *args):
+        """Override to use our logger instead of stderr."""
+        logger.debug(format % args)
+
+
+def get_local_ip() -> Optional[str]:
+    """Get the local IP address that's accessible from other devices."""
+    try:
+        # Connect to a remote server to determine local IP
+        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+        s.connect(("8.8.8.8", 80))
+        ip = s.getsockname()[0]
+        s.close()
+        return ip
+    except Exception as e:
+        logger.warning(f"Failed to determine local IP: {e}")
+        return None
+
+
+def start_file_server(port: int = 8001) -> Optional[str]:
+    """Start the HTTP file server.
+    
+    Args:
+        port: Port to serve on
+        
+    Returns:
+        Server URL if successful, None otherwise
+    """
+    global _file_server, _server_thread, _server_port
+    
+    if _file_server is not None:
+        logger.debug(f"File server already running on port {_server_port}")
+        local_ip = get_local_ip()
+        if local_ip:
+            return f"http://{local_ip}:{_server_port}"
+        return None
+    
+    try:
+        _server_port = port
+        
+        # Create server
+        server_address = ('', port)
+        _file_server = HTTPServer(server_address, FileServerHandler)
+        
+        # Start in daemon thread
+        _server_thread = threading.Thread(target=_file_server.serve_forever, daemon=True)
+        _server_thread.start()
+        
+        logger.info(f"File server started on port {port}")
+        
+        # Get local IP
+        local_ip = get_local_ip()
+        if local_ip:
+            server_url = f"http://{local_ip}:{port}"
+            logger.info(f"File server accessible at: {server_url}")
+            return server_url
+        else:
+            logger.warning("Could not determine local IP")
+            return None
+    
+    except Exception as e:
+        logger.error(f"Failed to start file server: {e}")
+        _file_server = None
+        _server_thread = None
+        return None
+
+
+def stop_file_server():
+    """Stop the HTTP file server."""
+    global _file_server, _server_thread
+    
+    if _file_server is not None:
+        try:
+            _file_server.shutdown()
+            _file_server.server_close()
+            logger.info("File server stopped")
+        except Exception as e:
+            logger.error(f"Error stopping file server: {e}")
+        finally:
+            _file_server = None
+            _server_thread = None
+
+
+def get_file_url(file_path: Path, server_url: Optional[str] = None) -> Optional[str]:
+    """Get the HTTP URL for a file.
+    
+    Args:
+        file_path: Path to the file
+        server_url: Base server URL (gets determined if None)
+        
+    Returns:
+        HTTP URL to the file, or None if server not running
+    """
+    if not file_path.exists():
+        logger.warning(f"File does not exist: {file_path}")
+        return None
+    
+    if server_url is None:
+        local_ip = get_local_ip()
+        if not local_ip:
+            logger.error("Cannot determine local IP for file URL")
+            return None
+        server_url = f"http://{local_ip}:{_server_port}"
+    
+    # URL encode the file path
+    encoded_path = urllib.parse.quote(str(file_path.resolve()))
+    return f"{server_url}/{encoded_path}"
--- a/helper/file_storage.py
+++ b/helper/file_storage.py
--- a/helper/http_client.py
+++ b/helper/http_client.py
@@ -0,0 +1,579 @@
+"""
+Unified HTTP client for downlow using httpx.
+
+Provides synchronous and asynchronous HTTP operations with:
+- Automatic retries on transient failures
+- Configurable timeouts and headers
+- Built-in progress tracking for downloads
+- Request/response logging support
+"""
+
+import httpx
+import asyncio
+from typing import Optional, Dict, Any, Callable, BinaryIO
+from pathlib import Path
+import logging
+
+logger = logging.getLogger(__name__)
+
+# Default configuration
+DEFAULT_TIMEOUT = 30.0
+DEFAULT_RETRIES = 3
+DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+
+
+class HTTPClient:
+    """Unified HTTP client with sync support."""
+    
+    def __init__(
+        self,
+        timeout: float = DEFAULT_TIMEOUT,
+        retries: int = DEFAULT_RETRIES,
+        user_agent: str = DEFAULT_USER_AGENT,
+        verify_ssl: bool = True,
+        headers: Optional[Dict[str, str]] = None,
+    ):
+        """
+        Initialize HTTP client.
+        
+        Args:
+            timeout: Request timeout in seconds
+            retries: Number of retries on transient failures
+            user_agent: User-Agent header value
+            verify_ssl: Whether to verify SSL certificates
+            headers: Additional headers to include in all requests
+        """
+        self.timeout = timeout
+        self.retries = retries
+        self.user_agent = user_agent
+        self.verify_ssl = verify_ssl
+        self.base_headers = headers or {}
+        self._client: Optional[httpx.Client] = None
+    
+    def __enter__(self):
+        """Context manager entry."""
+        self._client = httpx.Client(
+            timeout=self.timeout,
+            verify=self.verify_ssl,
+            headers=self._get_headers(),
+        )
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit."""
+        if self._client:
+            self._client.close()
+            self._client = None
+    
+    def _get_headers(self) -> Dict[str, str]:
+        """Get request headers with user-agent."""
+        headers = {"User-Agent": self.user_agent}
+        headers.update(self.base_headers)
+        return headers
+    
+    def get(
+        self,
+        url: str,
+        params: Optional[Dict[str, Any]] = None,
+        headers: Optional[Dict[str, str]] = None,
+        allow_redirects: bool = True,
+    ) -> httpx.Response:
+        """
+        Make a GET request.
+        
+        Args:
+            url: Request URL
+            params: Query parameters
+            headers: Additional headers
+            allow_redirects: Follow redirects
+            
+        Returns:
+            httpx.Response object
+        """
+        return self._request(
+            "GET",
+            url,
+            params=params,
+            headers=headers,
+            follow_redirects=allow_redirects,
+        )
+    
+    def post(
+        self,
+        url: str,
+        data: Optional[Any] = None,
+        json: Optional[Dict] = None,
+        files: Optional[Dict] = None,
+        headers: Optional[Dict[str, str]] = None,
+    ) -> httpx.Response:
+        """
+        Make a POST request.
+        
+        Args:
+            url: Request URL
+            data: Form data
+            json: JSON data
+            files: Files to upload
+            headers: Additional headers
+            
+        Returns:
+            httpx.Response object
+        """
+        return self._request(
+            "POST",
+            url,
+            data=data,
+            json=json,
+            files=files,
+            headers=headers,
+        )
+    
+    def put(
+        self,
+        url: str,
+        data: Optional[Any] = None,
+        json: Optional[Dict] = None,
+        content: Optional[Any] = None,
+        files: Optional[Dict] = None,
+        headers: Optional[Dict[str, str]] = None,
+    ) -> httpx.Response:
+        """
+        Make a PUT request.
+        
+        Args:
+            url: Request URL
+            data: Form data
+            json: JSON data
+            content: Raw content
+            files: Files to upload
+            headers: Additional headers
+            
+        Returns:
+            httpx.Response object
+        """
+        return self._request(
+            "PUT",
+            url,
+            data=data,
+            json=json,
+            content=content,
+            files=files,
+            headers=headers,
+        )
+
+    def delete(
+        self,
+        url: str,
+        headers: Optional[Dict[str, str]] = None,
+    ) -> httpx.Response:
+        """
+        Make a DELETE request.
+        
+        Args:
+            url: Request URL
+            headers: Additional headers
+            
+        Returns:
+            httpx.Response object
+        """
+        return self._request(
+            "DELETE",
+            url,
+            headers=headers,
+        )
+
+    def request(
+        self,
+        method: str,
+        url: str,
+        **kwargs
+    ) -> httpx.Response:
+        """
+        Make a generic HTTP request.
+        
+        Args:
+            method: HTTP method
+            url: Request URL
+            **kwargs: Additional arguments
+            
+        Returns:
+            httpx.Response object
+        """
+        return self._request(method, url, **kwargs)
+    
+    def download(
+        self,
+        url: str,
+        file_path: str,
+        chunk_size: int = 8192,
+        progress_callback: Optional[Callable[[int, int], None]] = None,
+        headers: Optional[Dict[str, str]] = None,
+    ) -> Path:
+        """
+        Download a file from URL with optional progress tracking.
+        
+        Args:
+            url: File URL
+            file_path: Local file path to save to
+            chunk_size: Download chunk size
+            progress_callback: Callback(bytes_downloaded, total_bytes)
+            headers: Additional headers
+            
+        Returns:
+            Path object of downloaded file
+        """
+        path = Path(file_path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        
+        with self._request_stream("GET", url, headers=headers, follow_redirects=True) as response:
+            response.raise_for_status()
+            total_bytes = int(response.headers.get("content-length", 0))
+            bytes_downloaded = 0
+            
+            with open(path, "wb") as f:
+                for chunk in response.iter_bytes(chunk_size):
+                    if chunk:
+                        f.write(chunk)
+                        bytes_downloaded += len(chunk)
+                        if progress_callback:
+                            progress_callback(bytes_downloaded, total_bytes)
+        
+        return path
+    
+    def _request(
+        self,
+        method: str,
+        url: str,
+        **kwargs
+    ) -> httpx.Response:
+        """
+        Make an HTTP request with automatic retries.
+        
+        Args:
+            method: HTTP method
+            url: Request URL
+            **kwargs: Additional arguments for httpx.Client.request()
+            
+        Returns:
+            httpx.Response object
+        """
+        if not self._client:
+            raise RuntimeError("HTTPClient must be used with context manager (with statement)")
+        
+        # Merge headers
+        if "headers" in kwargs and kwargs["headers"]:
+            headers = self._get_headers()
+            headers.update(kwargs["headers"])
+            kwargs["headers"] = headers
+        else:
+            kwargs["headers"] = self._get_headers()
+        
+        last_exception = None
+        
+        for attempt in range(self.retries):
+            try:
+                response = self._client.request(method, url, **kwargs)
+                response.raise_for_status()
+                return response
+            except httpx.TimeoutException as e:
+                last_exception = e
+                logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}")
+                if attempt < self.retries - 1:
+                    continue
+            except httpx.HTTPStatusError as e:
+                # Don't retry on 4xx errors
+                if 400 <= e.response.status_code < 500:
+                    try:
+                        response_text = e.response.text[:500]
+                    except:
+                        response_text = "<unable to read response>"
+                    logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
+                    raise
+                last_exception = e
+                try:
+                    response_text = e.response.text[:200]
+                except:
+                    response_text = "<unable to read response>"
+                logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
+                if attempt < self.retries - 1:
+                    continue
+            except (httpx.RequestError, httpx.ConnectError) as e:
+                last_exception = e
+                logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
+                if attempt < self.retries - 1:
+                    continue
+        
+        if last_exception:
+            logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
+            raise last_exception
+        
+        raise RuntimeError("Request failed after retries")
+    
+    def _request_stream(self, method: str, url: str, **kwargs):
+        """Make a streaming request."""
+        if not self._client:
+            raise RuntimeError("HTTPClient must be used with context manager (with statement)")
+        
+        # Merge headers
+        if "headers" in kwargs and kwargs["headers"]:
+            headers = self._get_headers()
+            headers.update(kwargs["headers"])
+            kwargs["headers"] = headers
+        else:
+            kwargs["headers"] = self._get_headers()
+        
+        return self._client.stream(method, url, **kwargs)
+
+
+class AsyncHTTPClient:
+    """Unified async HTTP client with asyncio support."""
+    
+    def __init__(
+        self,
+        timeout: float = DEFAULT_TIMEOUT,
+        retries: int = DEFAULT_RETRIES,
+        user_agent: str = DEFAULT_USER_AGENT,
+        verify_ssl: bool = True,
+        headers: Optional[Dict[str, str]] = None,
+    ):
+        """
+        Initialize async HTTP client.
+        
+        Args:
+            timeout: Request timeout in seconds
+            retries: Number of retries on transient failures
+            user_agent: User-Agent header value
+            verify_ssl: Whether to verify SSL certificates
+            headers: Additional headers to include in all requests
+        """
+        self.timeout = timeout
+        self.retries = retries
+        self.user_agent = user_agent
+        self.verify_ssl = verify_ssl
+        self.base_headers = headers or {}
+        self._client: Optional[httpx.AsyncClient] = None
+    
+    async def __aenter__(self):
+        """Async context manager entry."""
+        self._client = httpx.AsyncClient(
+            timeout=self.timeout,
+            verify=self.verify_ssl,
+            headers=self._get_headers(),
+        )
+        return self
+    
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit."""
+        if self._client:
+            await self._client.aclose()
+            self._client = None
+    
+    def _get_headers(self) -> Dict[str, str]:
+        """Get request headers with user-agent."""
+        headers = {"User-Agent": self.user_agent}
+        headers.update(self.base_headers)
+        return headers
+    
+    async def get(
+        self,
+        url: str,
+        params: Optional[Dict[str, Any]] = None,
+        headers: Optional[Dict[str, str]] = None,
+        allow_redirects: bool = True,
+    ) -> httpx.Response:
+        """
+        Make an async GET request.
+        
+        Args:
+            url: Request URL
+            params: Query parameters
+            headers: Additional headers
+            allow_redirects: Follow redirects
+            
+        Returns:
+            httpx.Response object
+        """
+        return await self._request(
+            "GET",
+            url,
+            params=params,
+            headers=headers,
+            follow_redirects=allow_redirects,
+        )
+    
+    async def post(
+        self,
+        url: str,
+        data: Optional[Any] = None,
+        json: Optional[Dict] = None,
+        headers: Optional[Dict[str, str]] = None,
+    ) -> httpx.Response:
+        """
+        Make an async POST request.
+        
+        Args:
+            url: Request URL
+            data: Form data
+            json: JSON data
+            headers: Additional headers
+            
+        Returns:
+            httpx.Response object
+        """
+        return await self._request(
+            "POST",
+            url,
+            data=data,
+            json=json,
+            headers=headers,
+        )
+    
+    async def download(
+        self,
+        url: str,
+        file_path: str,
+        chunk_size: int = 8192,
+        progress_callback: Optional[Callable[[int, int], None]] = None,
+        headers: Optional[Dict[str, str]] = None,
+    ) -> Path:
+        """
+        Download a file from URL asynchronously with optional progress tracking.
+        
+        Args:
+            url: File URL
+            file_path: Local file path to save to
+            chunk_size: Download chunk size
+            progress_callback: Callback(bytes_downloaded, total_bytes)
+            headers: Additional headers
+            
+        Returns:
+            Path object of downloaded file
+        """
+        path = Path(file_path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        
+        async with self._request_stream("GET", url, headers=headers) as response:
+            response.raise_for_status()
+            total_bytes = int(response.headers.get("content-length", 0))
+            bytes_downloaded = 0
+            
+            with open(path, "wb") as f:
+                async for chunk in response.aiter_bytes(chunk_size):
+                    if chunk:
+                        f.write(chunk)
+                        bytes_downloaded += len(chunk)
+                        if progress_callback:
+                            progress_callback(bytes_downloaded, total_bytes)
+        
+        return path
+    
+    async def _request(
+        self,
+        method: str,
+        url: str,
+        **kwargs
+    ) -> httpx.Response:
+        """
+        Make an async HTTP request with automatic retries.
+        
+        Args:
+            method: HTTP method
+            url: Request URL
+            **kwargs: Additional arguments for httpx.AsyncClient.request()
+            
+        Returns:
+            httpx.Response object
+        """
+        if not self._client:
+            raise RuntimeError("AsyncHTTPClient must be used with async context manager")
+        
+        # Merge headers
+        if "headers" in kwargs and kwargs["headers"]:
+            headers = self._get_headers()
+            headers.update(kwargs["headers"])
+            kwargs["headers"] = headers
+        else:
+            kwargs["headers"] = self._get_headers()
+        
+        last_exception = None
+        
+        for attempt in range(self.retries):
+            try:
+                response = await self._client.request(method, url, **kwargs)
+                response.raise_for_status()
+                return response
+            except httpx.TimeoutException as e:
+                last_exception = e
+                logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}")
+                if attempt < self.retries - 1:
+                    await asyncio.sleep(0.5)  # Brief delay before retry
+                    continue
+            except httpx.HTTPStatusError as e:
+                # Don't retry on 4xx errors
+                if 400 <= e.response.status_code < 500:
+                    try:
+                        response_text = e.response.text[:500]
+                    except:
+                        response_text = "<unable to read response>"
+                    logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
+                    raise
+                last_exception = e
+                try:
+                    response_text = e.response.text[:200]
+                except:
+                    response_text = "<unable to read response>"
+                logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
+                if attempt < self.retries - 1:
+                    await asyncio.sleep(0.5)
+                    continue
+            except (httpx.RequestError, httpx.ConnectError) as e:
+                last_exception = e
+                logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
+                if attempt < self.retries - 1:
+                    await asyncio.sleep(0.5)
+                    continue
+        
+        if last_exception:
+            logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
+            raise last_exception
+        
+        raise RuntimeError("Request failed after retries")
+    
+    def _request_stream(self, method: str, url: str, **kwargs):
+        """Make a streaming request."""
+        if not self._client:
+            raise RuntimeError("AsyncHTTPClient must be used with async context manager")
+        
+        # Merge headers
+        if "headers" in kwargs and kwargs["headers"]:
+            headers = self._get_headers()
+            headers.update(kwargs["headers"])
+            kwargs["headers"] = headers
+        else:
+            kwargs["headers"] = self._get_headers()
+        
+        return self._client.stream(method, url, **kwargs)
+
+
+# Convenience function for quick sync requests
+def get(url: str, **kwargs) -> httpx.Response:
+    """Quick GET request without context manager."""
+    with HTTPClient() as client:
+        return client.get(url, **kwargs)
+
+
+def post(url: str, **kwargs) -> httpx.Response:
+    """Quick POST request without context manager."""
+    with HTTPClient() as client:
+        return client.post(url, **kwargs)
+
+
+def download(
+    url: str,
+    file_path: str,
+    progress_callback: Optional[Callable[[int, int], None]] = None,
+    **kwargs
+) -> Path:
+    """Quick file download without context manager."""
+    with HTTPClient() as client:
+        return client.download(url, file_path, progress_callback=progress_callback, **kwargs)
--- a/helper/hydrus.py
+++ b/helper/hydrus.py
--- a/helper/libgen_service.py
+++ b/helper/libgen_service.py
@@ -0,0 +1,377 @@
+"""Shared Library Genesis search and download helpers."""
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional
+import logging
+import requests
+from urllib.parse import quote, urljoin
+
+from libgen import search_sync, LibgenError
+
+LogFn = Optional[Callable[[str], None]]
+ErrorFn = Optional[Callable[[str], None]]
+
+DEFAULT_TIMEOUT = 10.0
+DEFAULT_LIMIT = 50
+
+logging.getLogger(__name__).setLevel(logging.WARNING)
+
+
+def _call(logger: LogFn, message: str) -> None:
+    if logger:
+        logger(message)
+
+
+def search_libgen_no_ads(query: str, session: Optional[requests.Session] = None) -> List[Dict[str, Any]]:
+    """Search Libgen without triggering ads.php requests."""
+    try:
+        from bs4 import BeautifulSoup
+    except ImportError:  # pragma: no cover
+        logging.warning("BeautifulSoup not available; falling back to standard search")
+        return []
+
+    mirrors = [
+        "https://libgen.gl",
+        "https://libgen.vg",
+        "https://libgen.la",
+        "https://libgen.bz",
+        "https://libgen.gs",
+    ]
+
+    session = session or requests.Session()
+    session.headers.setdefault(
+        "User-Agent",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
+    )
+
+    for mirror in mirrors:
+        try:
+            search_url = f"{mirror}/index.php?req={quote(query)}&res=100&covers=on&filesuns=all"
+            response = session.get(search_url, timeout=DEFAULT_TIMEOUT)
+            if response.status_code != 200:
+                continue
+
+            soup = BeautifulSoup(response.content, "html.parser")
+            table = soup.find("table", {"class": "catalog"})
+            if table is None:
+                for candidate in soup.find_all("table"):
+                    rows = candidate.find_all("tr")
+                    if len(rows) > 2:
+                        table = candidate
+                        break
+            if table is None:
+                logging.debug("[libgen_no_ads] No results table on %s", mirror)
+                continue
+
+            rows = table.find_all("tr")[1:]
+            results: List[Dict[str, Any]] = []
+            for row in rows:
+                try:
+                    cells = row.find_all("td")
+                    if len(cells) < 9:
+                        continue
+
+                    size_cell = cells[7]
+                    file_link = size_cell.find("a")
+                    mirror_link = ""
+                    if file_link:
+                        href = str(file_link.get("href", ""))
+                        if href.startswith("/"):
+                            mirror_link = mirror + href
+                        elif href:
+                            mirror_link = urljoin(mirror, href)
+
+                    if not mirror_link:
+                        title_link = cells[1].find("a") if len(cells) > 1 else None
+                        if title_link:
+                            href = str(title_link.get("href", ""))
+                            if href.startswith("/"):
+                                mirror_link = mirror + href
+                            elif href:
+                                mirror_link = urljoin(mirror, href)
+
+                    if not mirror_link:
+                        continue
+
+                    results.append(
+                        {
+                            "id": "",
+                            "mirror": mirror_link,
+                            "cover": "",
+                            "title": cells[1].get_text(strip=True) if len(cells) > 1 else "Unknown",
+                            "authors": [cells[2].get_text(strip=True)]
+                            if len(cells) > 2
+                            else ["Unknown"],
+                            "publisher": cells[3].get_text(strip=True) if len(cells) > 3 else "",
+                            "year": cells[4].get_text(strip=True) if len(cells) > 4 else "",
+                            "pages": cells[6].get_text(strip=True) if len(cells) > 6 else "",
+                            "language": cells[5].get_text(strip=True) if len(cells) > 5 else "",
+                            "size": cells[7].get_text(strip=True) if len(cells) > 7 else "",
+                            "extension": cells[8].get_text(strip=True) if len(cells) > 8 else "",
+                            "isbn": "",
+                        }
+                    )
+                except Exception as exc:  # pragma: no cover - defensive
+                    logging.debug("[libgen_no_ads] Error parsing row: %s", exc)
+                    continue
+
+            if results:
+                logging.info("[libgen_no_ads] %d results from %s", len(results), mirror)
+                return results
+        except Exception as exc:  # pragma: no cover - mirror issues
+            logging.debug("[libgen_no_ads] Mirror %s failed: %s", mirror, exc)
+            continue
+
+    return []
+
+
+def format_book_info(book: Any) -> Dict[str, Any]:
+    """Format Libgen search result into a consistent dictionary."""
+    filesize_bytes = 0
+    size_str = getattr(book, "size", "") or ""
+    if size_str:
+        parts = size_str.strip().split()
+        try:
+            value = float(parts[0])
+            unit = parts[1].upper() if len(parts) > 1 else "B"
+            if unit in {"MB", "M"}:
+                filesize_bytes = int(value * 1024 * 1024)
+            elif unit in {"GB", "G"}:
+                filesize_bytes = int(value * 1024 * 1024 * 1024)
+            elif unit in {"KB", "K"}:
+                filesize_bytes = int(value * 1024)
+            else:
+                filesize_bytes = int(value)
+        except (ValueError, IndexError):  # pragma: no cover - defensive
+            filesize_bytes = 0
+
+    title = getattr(book, "title", "") or ""
+    isbn = getattr(book, "isbn", "") or ""
+    if not isbn and title:
+        import re
+
+        match = re.search(
+            r"((?:[\d]{10,13}(?:\s*[;,]\s*[\d]{10,13})+)|(?:[\d]{10,13})(?:\s*[;,]?\s*[\d\-]{0,50})?)\s*(?:\b|$)",
+            title,
+        )
+        if match:
+            potential_isbn = match.group(0).strip()
+            if re.search(r"\d{10,13}", potential_isbn):
+                isbn = potential_isbn
+                title = re.sub(r"\s+[a-z]\s*$", "", title[: match.start()].strip(), flags=re.IGNORECASE)
+
+    authors_value = getattr(book, "authors", None)
+    if isinstance(authors_value, Iterable) and not isinstance(authors_value, str):
+        authors_str = ", ".join(str(author) for author in authors_value)
+    else:
+        authors_str = str(authors_value or "Unknown")
+
+    download_links = getattr(book, "download_links", None)
+    mirror_url = None
+    if download_links and getattr(download_links, "get_link", None):
+        mirror_url = download_links.get_link
+
+    return {
+        "title": title or "Unknown",
+        "author": authors_str,
+        "publisher": getattr(book, "publisher", "") or "",
+        "year": getattr(book, "year", "") or "",
+        "pages": getattr(book, "pages", "") or "",
+        "language": getattr(book, "language", "") or "",
+        "filesize": filesize_bytes,
+        "filesize_str": size_str or "Unknown",
+        "extension": getattr(book, "extension", "") or "",
+        "isbn": isbn,
+        "mirror_url": mirror_url,
+    }
+
+
+def search_libgen(
+    query: str,
+    limit: int = DEFAULT_LIMIT,
+    *,
+    log_info: LogFn = None,
+    log_error: ErrorFn = None,
+    session: Optional[requests.Session] = None,
+) -> List[Dict[str, Any]]:
+    """Search Libgen returning formatted dictionaries with multiple mirrors.
+    
+    Uses HTML scraper (search_libgen_no_ads) to find books quickly.
+    Returns mirror URLs and book IDs that can be used to generate alternative mirrors.
+    """
+    try:
+        _call(log_info, f"[search] Searching Libgen for: {query}")
+        session = session or requests.Session()
+        
+        # Use HTML scraper - more reliable and doesn't hang on mirror resolution
+        _call(log_info, "[search] Using HTML scraper (search_libgen_no_ads)...")
+        results: List[Any] = search_libgen_no_ads(query, session=session)
+        
+        if not results:
+            _call(log_info, "[search] No results from HTML scraper")
+            return []
+
+        formatted: List[Dict[str, Any]] = []
+        mirrors_list = [
+            "https://libgen.gl",
+            "https://libgen.vg",
+            "https://libgen.la",
+            "https://libgen.bz",
+            "https://libgen.gs",
+        ]
+        
+        for book in results[:limit]:
+            if isinstance(book, dict):
+                # Result from search_libgen_no_ads (HTML scraper)
+                authors = book.get("authors", ["Unknown"])
+                if isinstance(authors, list):
+                    author_value = ", ".join(str(a) for a in authors)
+                else:
+                    author_value = str(authors)
+                
+                # Extract book ID from mirror URL if available
+                mirror = book.get("mirror", "")
+                book_id = ""
+                if mirror and "/file.php?id=" in mirror:
+                    try:
+                        book_id = mirror.split("/file.php?id=")[1].split("&")[0]
+                    except (IndexError, ValueError):
+                        pass
+                
+                # Build list of alternative mirrors based on book ID
+                mirrors_dict = {}
+                if book_id:
+                    for mirror_base in mirrors_list:
+                        mirrors_dict[mirror_base] = f"{mirror_base}/file.php?id={book_id}"
+                elif mirror:
+                    # Fallback: use the mirror we found
+                    mirrors_dict["primary"] = mirror
+                
+                formatted.append(
+                    {
+                        "title": book.get("title", "Unknown"),
+                        "author": author_value,
+                        "publisher": book.get("publisher", ""),
+                        "year": book.get("year", ""),
+                        "pages": book.get("pages", ""),
+                        "language": book.get("language", ""),
+                        "filesize": 0,
+                        "filesize_str": book.get("size", "Unknown"),
+                        "extension": book.get("extension", ""),
+                        "isbn": book.get("isbn", ""),
+                        "mirror_url": mirror,  # Primary mirror
+                        "mirrors": mirrors_dict,  # Alternative mirrors
+                        "book_id": book_id,
+                    }
+                )
+            else:
+                # Fallback: try to format as book object
+                try:
+                    formatted.append(format_book_info(book))
+                except Exception:
+                    pass
+
+        _call(log_info, f"[search] Found {len(formatted)} result(s)")
+        return formatted
+    except LibgenError as exc:
+        _call(log_error, f"[search] Libgen error: {exc}")
+        return []
+    except Exception as exc:  # pragma: no cover - defensive
+        _call(log_error, f"[search] Error: {exc}")
+        return []
+
+
+def download_from_mirror(
+    mirror_url: str,
+    output_path: str | Path,
+    *,
+    log_info: LogFn = None,
+    log_error: ErrorFn = None,
+    session: Optional[requests.Session] = None,
+) -> bool:
+    """Download a Libgen file and write it to disk.
+    
+    Handles Libgen redirects and ensures proper file download by:
+    - Following all redirects (default behavior)
+    - Setting User-Agent header (required by some mirrors)
+    - Validating that we're downloading binary content, not HTML
+    - Attempting alternative download method if HTML is returned
+    """
+    session = session or requests.Session()
+    try:
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        _call(log_info, f"[download] Downloading from mirror: {mirror_url}")
+        
+        # Ensure session has proper headers for Libgen
+        if 'User-Agent' not in session.headers:
+            session.headers['User-Agent'] = (
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+                "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+            )
+        
+        # Download with redirects enabled (default) and referer
+        session.headers['Referer'] = 'https://libgen.gs/'
+        response = session.get(mirror_url, stream=True, timeout=30, allow_redirects=True)
+        response.raise_for_status()
+        
+        # Check if we got HTML instead of a file (common Libgen issue)
+        content_type = response.headers.get('content-type', '').lower()
+        if 'text/html' in content_type:
+            _call(log_error, f"[download] Server returned HTML. Trying alternative method...")
+            
+            # Try to extract file ID and use alternative CDN
+            try:
+                # Parse the HTML to extract MD5 or file ID
+                from bs4 import BeautifulSoup
+                soup = BeautifulSoup(response.text, 'html.parser')
+                
+                # Look for download link in the HTML
+                # Common patterns: md5 hash in form, or direct link in anchor tags
+                download_link = None
+                
+                # Try to find forms that might contain download functionality
+                forms = soup.find_all('form')
+                for form in forms:
+                    action = form.get('action', '')
+                    if 'download' in action.lower() or 'get' in action.lower():
+                        download_link = action
+                        break
+                
+                if not download_link:
+                    _call(log_error, f"[download] Could not extract alternative download link from HTML")
+                    return False
+                
+                _call(log_info, f"[download] Using alternative download method: {download_link[:100]}")
+                # Try downloading from alternative link
+                response2 = session.get(download_link, stream=True, timeout=30, allow_redirects=True)
+                response2.raise_for_status()
+                response = response2  # Use the new response
+            
+            except Exception as alt_error:
+                _call(log_error, f"[download] Alternative method failed: {alt_error}")
+                return False
+
+        total_size = int(response.headers.get("content-length", 0))
+        downloaded = 0
+
+        with open(output_path, "wb") as handle:
+            for chunk in response.iter_content(chunk_size=8192):
+                if not chunk:
+                    continue
+                handle.write(chunk)
+                downloaded += len(chunk)
+                if total_size > 0:
+                    percent = downloaded / total_size * 100
+                    _call(
+                        log_info,
+                        f"[download] {percent:.1f}% - {downloaded // (1024*1024)}MB / {total_size // (1024*1024)}MB",
+                    )
+
+        _call(log_info, f"[download] Downloaded successfully to: {output_path}")
+        return True
+    except Exception as exc:  # pragma: no cover - defensive
+        _call(log_error, f"[download] Error: {exc}")
+        return False
--- a/helper/local_library.py
+++ b/helper/local_library.py
--- a/helper/logger.py
+++ b/helper/logger.py
@@ -0,0 +1,70 @@
+"""Unified logging utility for automatic file and function name tracking."""
+
+import sys
+import inspect
+from pathlib import Path
+
+_DEBUG_ENABLED = False
+
+def set_debug(enabled: bool) -> None:
+    """Enable or disable debug logging."""
+    global _DEBUG_ENABLED
+    _DEBUG_ENABLED = enabled
+
+def debug(*args, **kwargs) -> None:
+    """Print debug message if debug logging is enabled.
+    
+    Automatically prepends [filename.function_name] to all output.
+    """
+    if not _DEBUG_ENABLED:
+        return
+        
+    # Set default to stderr for debug messages
+    if 'file' not in kwargs:
+        kwargs['file'] = sys.stderr
+        
+    # Prepend DEBUG label
+    args = ("DEBUG:", *args)
+    
+    # Use the same logic as log()
+    log(*args, **kwargs)
+
+def log(*args, **kwargs) -> None:
+    """Print with automatic file.function prefix.
+    
+    Automatically prepends [filename.function_name] to all output.
+    Defaults to stdout if not specified.
+    
+    Example:
+        log("Upload started")  # Output: [add_file.run] Upload started
+    """
+    # Get the calling frame
+    frame = inspect.currentframe()
+    if frame is None:
+        print(*args, **kwargs)
+        return
+    
+    caller_frame = frame.f_back
+    if caller_frame is None:
+        print(*args, **kwargs)
+        return
+    
+    try:
+        # Get file name without extension
+        file_name = Path(caller_frame.f_code.co_filename).stem
+        
+        # Get function name
+        func_name = caller_frame.f_code.co_name
+        
+        # Set default to stdout if not specified
+        if 'file' not in kwargs:
+            kwargs['file'] = sys.stdout
+        
+        # Build prefix
+        prefix = f"[{file_name}.{func_name}]"
+        
+        # Print with prefix
+        print(prefix, *args, **kwargs)
+    finally:
+        del frame
+        del caller_frame
--- a/helper/mpv_file.py
+++ b/helper/mpv_file.py
@@ -0,0 +1,951 @@
+"""MPV file metadata aggregation helpers."""
+from __future__ import annotations
+
+import os
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Sequence
+from urllib.parse import parse_qs, urlparse, unquote
+
+from config import get_hydrus_url
+from helper.utils import sha256_file, unique_preserve_order
+from helper.hydrus import HydrusClient, HydrusRequestError
+
+import metadata
+
+
+class MPVFileError(RuntimeError):
+    """Raised when we cannot construct an MPV file snapshot."""
+
+
+@dataclass(slots=True)
+class DebridMagnet:
+    """Represents a magnet result from AllDebrid search.
+    
+    This class matches the structure expected by the TUI (like Hydrus results)
+    with title, target, media_kind attributes for compatibility.
+    """
+    magnet_id: str
+    title: str
+    size: int
+    status_code: int
+    status_text: str
+    progress: float
+    downloaded: int
+    seeders: int
+    dl_speed: int
+    tag_summary: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None  # Complete magnet file metadata from AllDebrid API
+    
+    @property
+    def target(self) -> str:
+        """Return the target URI for this magnet (used by TUI for access operations)."""
+        return f"alldebrid://{self.magnet_id}"
+    
+    @property
+    def media_kind(self) -> str:
+        """Return media kind for display."""
+        return "magnet"
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for metadata display."""
+        return {
+            "magnet_id": self.magnet_id,
+            "title": self.title,
+            "size": self.size,
+            "status_code": self.status_code,
+            "status_text": self.status_text,
+            "progress": f"{self.progress:.1f}%",
+            "downloaded": self.downloaded,
+            "seeders": self.seeders,
+            "dl_speed": self.dl_speed,
+        }
+
+
+@dataclass(slots=True)
+class HydrusSettings:
+    base_url: Optional[str]
+    access_key: Optional[str]
+    timeout: float
+    prefer_service_name: Optional[str]
+    include_relationships: bool
+
+    def as_metadata_options(self) -> Dict[str, Any]:
+        options: Dict[str, Any] = {
+            "timeout": self.timeout,
+            "include_relationships": self.include_relationships,
+        }
+        if self.prefer_service_name:
+            options["prefer_service_name"] = self.prefer_service_name
+        return options
+
+
+
+@dataclass(slots=True)
+class MPVfile:
+    path: Optional[str] = None
+    filename: Optional[str] = None
+    type: str = "unknown"
+    hash: Optional[str] = None
+    local_path: Optional[str] = None
+    mpv_metadata: Dict[str, Any] = field(default_factory=dict)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    remote_metadata: Optional[Dict[str, Any]] = None
+    relationships: Optional[Dict[str, Any]] = None
+    relationship_metadata: Dict[str, Any] = field(default_factory=dict)
+    tags: List[str] = field(default_factory=list)
+    original_tags: Dict[str, str] = field(default_factory=dict)
+    known_urls: List[str] = field(default_factory=list)
+    title: Optional[str] = None
+    source_url: Optional[str] = None
+    clip_time: Optional[str] = None
+    duration: Optional[float] = None
+    filesize_mb: Optional[float] = None
+    is_video: bool = False
+    is_audio: bool = False
+    is_deleted: Optional[bool] = None
+    is_local: Optional[bool] = None
+    has_current_file_service: Optional[bool] = None
+    tag_service_key: Optional[str] = None
+    swap_recommended: bool = False
+    warnings: List[str] = field(default_factory=list)
+    # New relationship fields for menu
+    king: Optional[str] = None
+    alts: List[str] = field(default_factory=list)
+
+    def to_dict(self) -> Dict[str, Any]:
+        payload: Dict[str, Any] = {
+            "path": self.path,
+            "filename": self.filename,
+            "type": self.type,
+            "hash": self.hash,
+            "local_path": self.local_path,
+            "mpv_metadata": self.mpv_metadata,
+            "metadata": self.metadata,
+            "remote_metadata": self.remote_metadata,
+            "relationships": self.relationships,
+            "relationship_metadata": self.relationship_metadata,
+            "tags": self.tags,
+            "original_tags": self.original_tags,
+            "known_urls": self.known_urls,
+            "title": self.title,
+            "source_url": self.source_url,
+            "clip_time": self.clip_time,
+            "duration": self.duration,
+            "filesize_mb": self.filesize_mb,
+            "is_video": self.is_video,
+            "is_audio": self.is_audio,
+            "is_deleted": self.is_deleted,
+            "is_local": self.is_local,
+            "has_current_file_service": self.has_current_file_service,
+            "tag_service_key": self.tag_service_key,
+            "swap_recommended": self.swap_recommended,
+            "warnings": self.warnings,
+            # relationship summary fields for easier Lua consumption
+            "king": self.king,
+            "alts": self.alts,
+        }
+        # Remove empty optional values for terser payloads.
+        for key in list(payload.keys()):
+            value = payload[key]
+            if value in (None, [], {}, ""):
+                del payload[key]
+        return payload
+
+
+def _normalise_string_list(values: Optional[Iterable[Any]]) -> List[str]:
+    if not values:
+        return []
+    seen: set[str] = set()
+    result: List[str] = []
+    for value in values:
+        if value is None:
+            continue
+        text = str(value).strip()
+        if not text or text in seen:
+            continue
+        seen.add(text)
+        result.append(text)
+    return result
+
+
+def _looks_like_hash(value: Optional[str]) -> bool:
+    if not value:
+        return False
+    candidate = value.strip().lower()
+    return len(candidate) == 64 and all(ch in "0123456789abcdef" for ch in candidate)
+
+
+class MPVFileBuilder:
+    def __init__(self, payload: Dict[str, Any], config: Dict[str, Any]):
+        self.payload = payload or {}
+        self.config = config or {}
+        self.state = MPVfile()
+        self.hydrus_settings = self._resolve_hydrus_settings()
+        self.remote_options = self._resolve_remote_options()
+        self.include_relationships = bool(self.payload.get("include_relationships", True))
+        self.last_url = self._normalise_url(self.payload.get("last_url"))
+        self._initialise_identity()
+
+    # ------------------------------------------------------------------
+    # public API
+    # ------------------------------------------------------------------
+
+    def build(self) -> Dict[str, Any]:
+        if self.state.type == "hydrus":
+            self._populate_hydrus_by_hash()
+        elif self.state.type == "local":
+            self._populate_local()
+        elif self.state.type == "remote":
+            self._populate_remote()
+        else:
+            # Attempt best effort resolution even for unknown types.
+            self._populate_local(best_effort=True)
+        self._finalise()
+        result = self.state.to_dict()
+        # Append King and Alts info to mpv_metadata for info menu
+        king = self.state.king
+        alts = self.state.alts
+        if king:
+            result.setdefault("mpv_metadata", {})["King"] = king
+        if alts:
+            result.setdefault("mpv_metadata", {})["Alts"] = ", ".join(alts)
+        return result
+
+    # ------------------------------------------------------------------
+    # configuration helpers
+    # ------------------------------------------------------------------
+
+    def _resolve_hydrus_settings(self) -> HydrusSettings:
+        overrides = self.payload.get("hydrus")
+        overrides = overrides if isinstance(overrides, dict) else {}
+        base_url = overrides.get("url") or overrides.get("base_url")
+        access_key = overrides.get("access_key")
+        timeout_raw = overrides.get("timeout") or overrides.get("hydrus_timeout")
+        prefer_service = overrides.get("prefer_service_name")
+        include_relationships = overrides.get("include_relationships")
+        if base_url is None:
+            base_url = get_hydrus_url(self.config)
+        if access_key is None:
+            raw_key = self.config.get("HydrusNetwork_Access_Key")
+            access_key = str(raw_key) if raw_key is not None else None
+        if timeout_raw is None:
+            timeout_raw = self.config.get("HydrusNetwork_Request_Timeout")
+        try:
+            timeout = float(timeout_raw) if timeout_raw is not None else 60.0
+        except (TypeError, ValueError):
+            timeout = 60.0
+        if prefer_service is None:
+            prefer_service = self.config.get("Hydrus_Tag_Service")
+        if isinstance(prefer_service, str):
+            prefer_service = prefer_service.strip() or None
+        if include_relationships is None:
+            include_relationships = self.payload.get("include_relationships")
+        include_relationships = bool(True if include_relationships is None else include_relationships)
+        base_url = base_url.strip() if isinstance(base_url, str) else None
+        access_key = access_key.strip() if isinstance(access_key, str) else None
+        return HydrusSettings(
+            base_url=base_url or None,
+            access_key=access_key or None,
+            timeout=timeout,
+            prefer_service_name=prefer_service,
+            include_relationships=include_relationships,
+        )
+
+    def _resolve_remote_options(self) -> Dict[str, Any]:
+        remote_payload = self.payload.get("remote")
+        remote_payload = remote_payload if isinstance(remote_payload, dict) else {}
+        options = remote_payload.get("options")
+        options = options if isinstance(options, dict) else {}
+        ytdlp_args = options.get("ytdlp_args")
+        if not ytdlp_args:
+            options["ytdlp_args"] = ["--no-playlist", "--skip-download", "--no-warnings"]
+        existing_timeout = options.get("timeout")
+        if existing_timeout is None:
+            options["timeout"] = min(90.0, max(10.0, float(self.payload.get("remote_timeout") or 45.0)))
+        return options
+
+    # ------------------------------------------------------------------
+    # initialisation
+    # ------------------------------------------------------------------
+
+    def _initialise_identity(self) -> None:
+        s = self.state
+        p = self.payload
+
+        def _str_or_none(v):
+            return str(v) if v is not None and v != "" else None
+
+        def _copy_dict_if_dict(v):
+            return dict(v) if isinstance(v, dict) else {}
+
+        # path and filename
+        s.path = _str_or_none(p.get("path"))
+        s.filename = _str_or_none(p.get("filename"))
+
+        # mpv metadata
+        s.mpv_metadata = _copy_dict_if_dict(p.get("mpv_metadata"))
+
+        # tags (support both "tags" and legacy "existing_tags")
+        existing_tags = p.get("tags") or p.get("existing_tags")
+        s.tags = _normalise_string_list(existing_tags)
+        if s.tags:
+            s.original_tags = {tag: tag for tag in s.tags}
+
+        # known URLs + last_url
+        s.known_urls = _normalise_string_list(p.get("known_urls"))
+        if self.last_url and self.last_url not in s.known_urls:
+            s.known_urls.append(self.last_url)
+
+        # source URL (explicit or fallback to last_url)
+        explicit_source = p.get("source_url")
+        s.source_url = self._normalise_url(explicit_source) or self.last_url
+
+        # hash (validate looks-like-hash)
+        hash_candidate = p.get("hash")
+        if isinstance(hash_candidate, str):
+            candidate = hash_candidate.strip().lower()
+            if _looks_like_hash(candidate):
+                s.hash = candidate
+
+        # local_path (non-empty string)
+        local_path_override = p.get("local_path")
+        if isinstance(local_path_override, str):
+            lp = local_path_override.strip()
+            if lp:
+                s.local_path = lp
+
+        # derive remaining fields from path/filename/type
+        self._derive_filename_from_path()
+        self._determine_type()
+
+
+    def _derive_filename_from_path(self) -> None:
+        if self.state.filename or not self.state.path:
+            return
+        parsed = urlparse(self.state.path)
+        if parsed.scheme in ("http", "https", "ytdl") and parsed.path:
+            candidate = Path(parsed.path).name
+            if candidate:
+                self.state.filename = candidate
+        elif parsed.scheme == "file":
+            decoded = self._decode_file_url(self.state.path)
+            if decoded:
+                self.state.filename = Path(decoded).name
+        else:
+            try:
+                self.state.filename = Path(self.state.path).name
+            except Exception:
+                pass
+
+    def _determine_type(self) -> None:
+        s = self.state
+        p = self.payload
+
+        def _set_local_from_path(pth: str | None):
+            if not pth:
+                return
+            # Prefer resolved local path when available
+            resolved = self._resolve_local_path(pth)
+            s.local_path = resolved if resolved else pth
+            s.type = "local"
+
+        # 1) Respect explicit type when valid
+        explicit = p.get("type")
+        if isinstance(explicit, str):
+            lowered = explicit.strip().lower()
+            if lowered in {"local", "hydrus", "remote"}:
+                s.type = lowered
+                if lowered == "local":
+                    s.local_path = self._resolve_local_path(s.path)
+                return
+
+        # 2) Work from path
+        path = s.path or ""
+        if not path:
+            s.type = "unknown"
+            return
+
+        # 3) Hydrus-specific quick checks
+        if self._looks_like_hydrus_url(path):
+            s.type = "hydrus"
+            return
+
+        parsed = urlparse(path)
+        scheme = (parsed.scheme or "").lower()
+
+        # 4) scheme-based handling
+        if scheme == "hydrus":
+            s.type = "hydrus"
+            return
+
+        if scheme in {"http", "https", "rtmp", "rtsp", "magnet", "ytdl"}:
+            s.type = "hydrus" if self._looks_like_hydrus_url(path) else "remote"
+            return
+
+        if scheme == "file":
+            decoded = self._decode_file_url(path)
+            if decoded:
+                s.local_path = decoded
+            s.type = "local"
+            return
+
+        # 5) Windows/UNC absolute paths
+        if re.match(r"^[A-Za-z]:[\\/]", path) or path.startswith(("\\\\", "//")):
+            s.type = "local"
+            s.local_path = path
+            return
+
+        # 6) Fallback: if it looks like a URL with a scheme separator treat as remote/hydrus
+        if "://" in path:
+            s.type = "hydrus" if self._looks_like_hydrus_url(path) else "remote"
+            return
+
+        # 7) Otherwise treat as a local path
+        _set_local_from_path(path)
+
+
+    # ------------------------------------------------------------------
+    # population helpers
+    # ------------------------------------------------------------------
+
+    def _populate_local(self, best_effort: bool = False) -> None:
+        local_path = self.state.local_path or self._resolve_local_path(self.state.path)
+        if local_path:
+            self.state.local_path = local_path
+            self._load_sidecar_tags(local_path)
+            if not self.state.hash:
+                self._compute_local_hash(local_path)
+        # If Hydrus is configured and we have a hash, enrich from Hydrus; otherwise keep local tags only
+        if self.state.hash and self.hydrus_settings.base_url and self.hydrus_settings.access_key:
+            self._populate_hydrus_by_hash()
+        elif best_effort and self.hydrus_settings.base_url and self.state.source_url and self.hydrus_settings.access_key:
+            self._populate_hydrus_by_url(self.state.source_url)
+
+    # (helpers for resolving local path and loading sidecars already exist below)
+
+    def _populate_remote(self) -> None:
+        source_url = self.state.source_url or self.last_url or self.state.path
+        source_url = self._normalise_url(source_url)
+        if source_url:
+            self.state.source_url = source_url
+        remote_payload = {
+            "source_url": self.state.source_url,
+            "existing_tags": self.state.tags,
+            "metadata": self.payload.get("remote_metadata"),
+            "mpv_metadata": self.state.mpv_metadata,
+            "options": self.remote_options,
+        }
+        try:
+            remote_result = metadata.resolve_remote_metadata(remote_payload)
+        except Exception as exc:  # pragma: no cover - surfaced to the caller
+            self.state.warnings.append(str(exc))
+            remote_result = None
+        if remote_result:
+            tags = remote_result.get("tags") or []
+            self._merge_tags(tags)
+            self.state.remote_metadata = remote_result.get("metadata")
+            self.state.title = remote_result.get("title") or self.state.title
+            self.state.duration = remote_result.get("duration") or self.state.duration
+            self.state.source_url = remote_result.get("source_url") or self.state.source_url
+            warnings = remote_result.get("warnings") or []
+            if warnings:
+                self.state.warnings.extend(warnings)
+        if self.hydrus_settings.base_url and self.state.source_url:
+            self._populate_hydrus_by_url(self.state.source_url)
+
+    def _populate_hydrus_by_hash(self) -> None:
+        hash_hex = self.state.hash or self._extract_hash_from_path(self.state.path)
+        if hash_hex and not _looks_like_hash(hash_hex):
+            hash_hex = None
+        if not hash_hex:
+            return
+        self.state.hash = hash_hex
+        if not self.hydrus_settings.base_url:
+            return
+        payload: Dict[str, Any] = {
+            "api_url": self.hydrus_settings.base_url,
+            "access_key": self.hydrus_settings.access_key or "",
+            "options": self.hydrus_settings.as_metadata_options(),
+            "hash": hash_hex,
+        }
+        try:
+            result = metadata.fetch_hydrus_metadata(payload)
+        except Exception as exc:  # pragma: no cover - surfaced to caller
+            self.state.warnings.append(str(exc))
+            return
+        self._apply_hydrus_result(result)
+        # Enrich relationships using the dedicated Hydrus endpoint (robust GET)
+        if self.include_relationships and self.state.hash and self.hydrus_settings.base_url:
+            self._enrich_relationships_from_api(self.state.hash)
+
+    def _populate_hydrus_by_url(self, url: str) -> None:
+        if not self.hydrus_settings.base_url:
+            return
+        payload: Dict[str, Any] = {
+            "api_url": self.hydrus_settings.base_url,
+            "access_key": self.hydrus_settings.access_key or "",
+            "options": self.hydrus_settings.as_metadata_options(),
+            "url": url,
+        }
+        try:
+            result = metadata.fetch_hydrus_metadata_by_url(payload)
+        except Exception as exc:  # pragma: no cover - surfaced to caller
+            self.state.warnings.append(str(exc))
+            return
+        if result.get("error") == "not_found":
+            self.state.warnings.extend(result.get("warnings") or [])
+            return
+        self._apply_hydrus_result(result)
+        self.state.type = "hydrus"
+        matched_url = result.get("matched_url") or result.get("url")
+        if matched_url and matched_url not in self.state.known_urls:
+            self.state.known_urls.append(matched_url)
+        # Enrich relationships once we know the hash
+        if self.include_relationships and self.state.hash and self.hydrus_settings.base_url:
+            self._enrich_relationships_from_api(self.state.hash)
+
+    # ------------------------------------------------------------------
+    # state modification helpers
+    # ------------------------------------------------------------------
+
+
+    def _apply_hydrus_result(self, result: Dict[str, Any]) -> None:
+        metadata_payload = result.get("metadata")
+        if isinstance(metadata_payload, dict):
+            # Process mime into type for Lua
+            mime = metadata_payload.get("mime")
+            if isinstance(mime, str):
+                if mime.startswith("video/"):
+                    metadata_payload["type"] = "video"
+                elif mime.startswith("audio/"):
+                    metadata_payload["type"] = "audio"
+                elif mime.startswith("image/"):
+                    metadata_payload["type"] = "image"
+                else:
+                    metadata_payload["type"] = "other"
+            self.state.metadata = metadata_payload
+            # Do NOT overwrite MPVfile.type with metadata.type
+            self._merge_known_urls(metadata_payload.get("known_urls") or metadata_payload.get("known_urls_set"))
+            source_url = metadata_payload.get("original_url") or metadata_payload.get("source_url")
+            if source_url and not self.state.source_url:
+                self.state.source_url = self._normalise_url(source_url)
+            # If file_relationships are embedded in metadata, capture as relationships when missing
+            if self.state.relationships is None:
+                embedded = metadata_payload.get("file_relationships")
+                if isinstance(embedded, dict) and embedded:
+                    self.state.relationships = embedded
+        tags = result.get("tags") or []
+        self._merge_tags(tags)
+        hash_value = result.get("hash") or result.get("matched_hash")
+        if isinstance(hash_value, str) and _looks_like_hash(hash_value):
+            self.state.hash = hash_value.lower()
+        self.state.tag_service_key = result.get("tag_service_key") or self.state.tag_service_key
+        self.state.duration = result.get("duration") or self.state.duration
+        self.state.filesize_mb = result.get("filesize_mb") or self.state.filesize_mb
+        self.state.is_video = bool(result.get("is_video") or self.state.is_video)
+        self.state.is_audio = bool(result.get("is_audio") or self.state.is_audio)
+        if result.get("is_deleted") is not None:
+            self.state.is_deleted = bool(result.get("is_deleted"))
+        if result.get("is_local") is not None:
+            self.state.is_local = bool(result.get("is_local"))
+        if result.get("has_current_file_service") is not None:
+            self.state.has_current_file_service = bool(result.get("has_current_file_service"))
+        # Consolidate relationships from explicit result or embedded metadata
+        relationships_obj: Optional[Dict[str, Any]] = None
+        if isinstance(result.get("relationships"), dict):
+            relationships_obj = result["relationships"]
+            self.state.relationships = relationships_obj
+        elif isinstance(self.state.relationships, dict):
+            relationships_obj = self.state.relationships
+
+        # Helper to flatten any hashes from the relationships object
+        def _collect_hashes(obj: Any, acc: set[str]) -> None:
+            if obj is None:
+                return
+            if isinstance(obj, dict):
+                for v in obj.values():
+                    _collect_hashes(v, acc)
+            elif isinstance(obj, (list, tuple, set)):
+                for v in obj:
+                    _collect_hashes(v, acc)
+            elif isinstance(obj, str) and _looks_like_hash(obj):
+                acc.add(obj.lower())
+
+        # Derive king and alts robustly from available data
+        king: Optional[str] = None
+        alts: list[str] = []
+
+        # 1) Try direct king fields on relationships object
+        rels = relationships_obj or {}
+        if isinstance(rels, dict):
+            # Common variants
+            for key in ("king", "king_hash", "duplicate_king", "best", "best_hash"):
+                val = rels.get(key)
+                if isinstance(val, str) and _looks_like_hash(val):
+                    king = val.lower()
+                    break
+                if isinstance(val, list):
+                    for h in val:
+                        if isinstance(h, str) and _looks_like_hash(h):
+                            king = h.lower()
+                            break
+                    if king:
+                        break
+            # 2) Extract alternates from known fields: numeric "3" (clips), or textual synonyms
+            for alt_key in ("3", "alternates", "alts", "clips"):
+                val = rels.get(alt_key)
+                if isinstance(val, list):
+                    for h in val:
+                        if isinstance(h, str) and _looks_like_hash(h):
+                            h_low = h.lower()
+                            if not king or h_low != king:
+                                alts.append(h_low)
+                # some APIs might nest
+                elif isinstance(val, dict):
+                    tmp: set[str] = set()
+                    _collect_hashes(val, tmp)
+                    for h in sorted(tmp):
+                        if not king or h != king:
+                            alts.append(h)
+
+        # 3) Use relationship_metadata keys as additional alternates and king hint
+        rel_meta = result.get("relationship_metadata")
+        if isinstance(rel_meta, dict):
+            # prefer king candidate with no clip_time if not set
+            if not king:
+                for h, meta in rel_meta.items():
+                    if isinstance(h, str) and _looks_like_hash(h) and isinstance(meta, dict):
+                        if not meta.get("clip_time"):
+                            king = h.lower()
+                            break
+            for h in rel_meta.keys():
+                if isinstance(h, str) and _looks_like_hash(h):
+                    h_low = h.lower()
+                    if not king or h_low != king:
+                        alts.append(h_low)
+
+        # 4) As a last resort, flatten all relationship hashes
+        if not alts and relationships_obj:
+            tmp: set[str] = set()
+            _collect_hashes(relationships_obj, tmp)
+            for h in sorted(tmp):
+                if not king or h != king:
+                    alts.append(h)
+
+        # 5) Include current file when appropriate
+        if self.state.hash and (not king or self.state.hash != king) and self.state.hash not in alts:
+            alts.append(self.state.hash)
+
+        # 6) Sort alternates by clip start time when available
+        rel_meta_all = result.get("relationship_metadata") if isinstance(result.get("relationship_metadata"), dict) else {}
+        def _clip_start_for(h: str) -> float:
+            meta = rel_meta_all.get(h) if isinstance(rel_meta_all, dict) else None
+            clip = meta.get("clip_time") if isinstance(meta, dict) else None
+            if isinstance(clip, str):
+                m = re.match(r"^(\d+)-(\d+)$", clip)
+                if m:
+                    try:
+                        return float(m.group(1))
+                    except Exception:
+                        return float("inf")
+            return float("inf")
+
+        if alts:
+            # de-duplicate while preserving earliest clip time ordering
+            seen: set[str] = set()
+            alts = [h for h in sorted(alts, key=_clip_start_for) if (h not in seen and not seen.add(h))]
+
+        self.state.king = king
+        self.state.alts = alts
+        if isinstance(result.get("relationship_metadata"), dict):
+            self.state.relationship_metadata = result["relationship_metadata"]
+        self.state.title = result.get("title") or self.state.title
+        self.state.clip_time = result.get("clip_time") or self.state.clip_time
+        if result.get("swap_recommended"):
+            self.state.swap_recommended = True
+        warnings = result.get("warnings") or []
+        if warnings:
+            self.state.warnings.extend(warnings)
+
+    # ------------------------------------------------------------------
+    # relationships enrichment (Hydrus endpoint + alt metadata)
+    # ------------------------------------------------------------------
+
+    def _enrich_relationships_from_api(self, file_hash: str) -> None:
+        """Fetch relationships for the given hash and enrich state's king/alts and alt metadata.
+
+        - Uses GET /manage_file_relationships/get_file_relationships?hash=...
+        - If alts exist, batch-fetch their metadata via GET /get_files/file_metadata?hashes=[...]
+        - Extracts title, duration, size, tags (cleaned: title: kept with namespace, others stripped)
+        """
+        base_url = self.hydrus_settings.base_url or ""
+        access_key = self.hydrus_settings.access_key or ""
+        if not base_url:
+            return
+        try:
+            client = HydrusClient(base_url, access_key, timeout=self.hydrus_settings.timeout)
+        except Exception as exc:  # pragma: no cover - construction should rarely fail
+            self.state.warnings.append(f"Hydrus client init failed: {exc}")
+            return
+        try:
+            rel_resp = client.get_file_relationships(file_hash)
+        except HydrusRequestError as hre:  # pragma: no cover - surfaced but non-fatal
+            self.state.warnings.append(f"relationships api: {hre}")
+            return
+        except Exception as exc:  # pragma: no cover
+            self.state.warnings.append(f"relationships api: {exc}")
+            return
+
+        rel_map = rel_resp.get("file_relationships") or {}
+        rel_obj = None
+        if isinstance(rel_map, dict):
+            rel_obj = rel_map.get(file_hash) or next((v for v in rel_map.values() if isinstance(v, dict)), None)
+        if isinstance(rel_obj, dict):
+            # Preserve the full relationships object
+            self.state.relationships = rel_obj
+            # Update king and alts from canonical fields
+            king = rel_obj.get("king")
+            alts = rel_obj.get("3") or []
+            if isinstance(king, str) and _looks_like_hash(king):
+                self.state.king = king.lower()
+            if isinstance(alts, list):
+                self.state.alts = [h.lower() for h in alts if isinstance(h, str) and _looks_like_hash(h)]
+
+        # Fetch alt metadata if we have alts
+        if not self.state.alts:
+            return
+        try:
+            meta_resp = client.fetch_file_metadata(
+                hashes=self.state.alts,
+                include_service_keys_to_tags=True,
+                include_duration=True,
+                include_size=True,
+                include_file_urls=False,
+                include_mime=False,
+            )
+        except HydrusRequestError as hre:  # pragma: no cover
+            self.state.warnings.append(f"metadata api: {hre}")
+            return
+        except Exception as exc:  # pragma: no cover
+            self.state.warnings.append(f"metadata api: {exc}")
+            return
+
+        if not isinstance(meta_resp, dict):
+            return
+        entries = meta_resp.get("metadata") or []
+        if not isinstance(entries, list):
+            return
+
+        def _extract_tags(meta: Dict[str, Any]) -> list[str]:
+            tags: list[str] = []
+            tag_root = meta.get("tags") or meta.get("service_keys_to_statuses_to_tags") or {}
+            if isinstance(tag_root, dict):
+                for service_dict in tag_root.values():
+                    if not isinstance(service_dict, dict):
+                        continue
+                    # Prefer storage_tags but fall back to any list values under known keys
+                    storage = service_dict.get("storage_tags")
+                    if isinstance(storage, dict):
+                        for vals in storage.values():
+                            if isinstance(vals, list):
+                                tags.extend([str(t) for t in vals if isinstance(t, str)])
+                    else:
+                        # fall back: inspect lists directly under service_dict
+                        for vals in service_dict.values():
+                            if isinstance(vals, list):
+                                tags.extend([str(t) for t in vals if isinstance(t, str)])
+            return tags
+
+        def _clean_tags_and_title(all_tags: list[str]) -> tuple[Optional[str], list[str]]:
+            title_val: Optional[str] = None
+            cleaned: list[str] = []
+            for tag in all_tags:
+                if not isinstance(tag, str):
+                    continue
+                if tag.startswith("title:"):
+                    if title_val is None:
+                        title_val = tag.split(":", 1)[1]
+                    cleaned.append(tag)  # keep namespaced title
+                else:
+                    if ":" in tag:
+                        cleaned.append(tag.split(":", 1)[1])
+                    else:
+                        cleaned.append(tag)
+            return title_val, cleaned
+
+        for meta in entries:
+            if not isinstance(meta, dict):
+                continue
+            h = meta.get("hash")
+            if not (isinstance(h, str) and _looks_like_hash(h)):
+                continue
+            tags_all = _extract_tags(meta)
+            title_val, tags_clean = _clean_tags_and_title(tags_all)
+            alt_info = {
+                "title": title_val,
+                "duration": meta.get("duration"),
+                "size": meta.get("size"),
+                "tags": tags_clean,
+            }
+            self.state.relationship_metadata[h.lower()] = alt_info
+
+    def _merge_tags(self, tags: Sequence[Any]) -> None:
+        incoming = _normalise_string_list(tags)
+        if not incoming:
+            return
+        combined = list(self.state.tags or []) + incoming
+        self.state.tags = unique_preserve_order(combined)
+        for tag in incoming:
+            if tag not in self.state.original_tags:
+                self.state.original_tags[tag] = tag
+
+    def _merge_known_urls(self, urls: Optional[Iterable[Any]]) -> None:
+        if not urls:
+            return
+        combined = list(self.state.known_urls or []) + _normalise_string_list(urls)
+        self.state.known_urls = unique_preserve_order(combined)
+
+    def _load_sidecar_tags(self, local_path: str) -> None:
+        try:
+            media_path = Path(local_path)
+        except Exception:
+            return
+        if not media_path.exists():
+            return
+        candidates = [media_path.with_suffix(".tags"), media_path.with_suffix(".tags.txt")]
+        for candidate in candidates:
+            if candidate.exists():
+                hash_value, tags, known = self._read_sidecar(candidate)
+                if hash_value and not self.state.hash and _looks_like_hash(hash_value):
+                    self.state.hash = hash_value.lower()
+                self._merge_tags(tags)
+                self._merge_known_urls(known)
+                break
+
+    def _read_sidecar(self, sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]:
+        try:
+            raw = sidecar_path.read_text(encoding="utf-8", errors="ignore")
+        except OSError:
+            return None, [], []
+        hash_value: Optional[str] = None
+        tags: List[str] = []
+        known_urls: List[str] = []
+        for line in raw.splitlines():
+            trimmed = line.strip()
+            if not trimmed:
+                continue
+            lowered = trimmed.lower()
+            if lowered.startswith("hash:"):
+                candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else ""
+                if candidate:
+                    hash_value = candidate
+            elif lowered.startswith("known_url:") or lowered.startswith("url:"):
+                candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else ""
+                if candidate:
+                    known_urls.append(candidate)
+            else:
+                tags.append(trimmed)
+        return hash_value, tags, known_urls
+
+    def _compute_local_hash(self, local_path: str) -> None:
+        try:
+            digest = sha256_file(Path(local_path))
+        except OSError as exc:
+            self.state.warnings.append(f"sha256 failed: {exc}")
+            return
+        self.state.hash = digest.lower()
+
+    # ------------------------------------------------------------------
+    # finalisation helpers
+    # ------------------------------------------------------------------
+
+    def _finalise(self) -> None:
+        if self.state.tags:
+            self.state.tags = unique_preserve_order(self.state.tags)
+        if self.state.known_urls:
+            self.state.known_urls = unique_preserve_order(self.state.known_urls)
+        # Ensure metadata.type is always present for Lua, but do NOT overwrite MPVfile.type
+        if not self.state.title:
+            if self.state.metadata.get("title"):
+                self.state.title = str(self.state.metadata["title"]).strip()
+            elif self.state.filename:
+                self.state.title = self.state.filename
+        if self.state.hash and not _looks_like_hash(self.state.hash):
+            self.state.hash = None
+        if self.state.relationship_metadata is None:
+            self.state.relationship_metadata = {}
+        if self.state.relationships is not None and not isinstance(self.state.relationships, dict):
+            self.state.relationships = None
+        if self.state.original_tags is None:
+            self.state.original_tags = {}
+
+    # ------------------------------------------------------------------
+    # util helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _normalise_url(value: Any) -> Optional[str]:
+        if value is None:
+            return None
+        text = str(value).strip()
+        if not text:
+            return None
+        return text
+
+    @staticmethod
+    def _resolve_local_path(path: Optional[str]) -> Optional[str]:
+        if not path:
+            return None
+        parsed = urlparse(path)
+        if parsed.scheme == "file":
+            decoded = MPVFileBuilder._decode_file_url(path)
+            return decoded
+        return path
+
+    @staticmethod
+    def _decode_file_url(value: str) -> Optional[str]:
+        parsed = urlparse(value)
+        if parsed.scheme != "file":
+            return None
+        netloc = parsed.netloc or ""
+        path = unquote(parsed.path or "")
+        if netloc:
+            path = f"//{netloc}{path}"
+        if os.name == "nt" and path.startswith("/") and re.match(r"/[A-Za-z]:", path):
+            path = path[1:]
+        path = path.replace("/", os.sep)
+        return path
+
+    def _looks_like_hydrus_url(self, url: str) -> bool:
+        if not url:
+            return False
+        if url.startswith("hydrus://"):
+            return True
+        if "Hydrus-Client-API-Access-Key=" in url:
+            return True
+        base = self.hydrus_settings.base_url
+        if base and url.startswith(base) and "/get_files/" in url:
+            return True
+        return False
+
+    @staticmethod
+    def _extract_hash_from_path(path: Optional[str]) -> Optional[str]:
+        if not path:
+            return None
+        parsed = urlparse(path)
+        query = parse_qs(parsed.query)
+        if "hash" in query and query["hash"]:
+            candidate = query["hash"][0].strip()
+            if candidate:
+                return candidate.lower()
+        match = re.search(r"hash=([0-9a-fA-F]{64})", path)
+        if match:
+            return match.group(1).lower()
+        return None
+
+
+def build_mpv_file_state(payload: Dict[str, Any], config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+    builder = MPVFileBuilder(payload or {}, config or {})
+    return builder.build()
--- a/helper/progress.py
+++ b/helper/progress.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+"""Text-based progress bar utilities for consistent display across all downloads."""
+
+import sys
+
+from helper.logger import log, debug
+
+
+def format_progress_bar(current: int, total: int, width: int = 40, label: str = "") -> str:
+    """Create a text-based progress bar.
+    
+    Args:
+        current: Current progress (bytes/items)
+        total: Total to complete (bytes/items)
+        width: Width of the bar in characters (default 40)
+        label: Optional label prefix
+        
+    Returns:
+        Formatted progress bar string
+        
+    Examples:
+        format_progress_bar(50, 100)
+        # Returns: "[████████████████░░░░░░░░░░░░░░░░░░░░] 50.0%"
+        
+        format_progress_bar(256*1024*1024, 1024*1024*1024, label="download.zip")
+        # Returns: "download.zip: [████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0%"
+    """
+    if total <= 0:
+        percentage = 0
+        filled = 0
+    else:
+        percentage = (current / total) * 100
+        filled = int((current / total) * width)
+    
+    # Create bar: filled blocks + empty blocks
+    bar = "█" * filled + "░" * (width - filled)
+    
+    # Format percentage
+    pct_str = f"{percentage:.1f}%"
+    
+    # Build result
+    if label:
+        result = f"{label}: [{bar}] {pct_str}"
+    else:
+        result = f"[{bar}] {pct_str}"
+    
+    return result
+
+
+def format_size(bytes_val: float) -> str:
+    """Format bytes to human-readable size.
+    
+    Examples:
+        format_size(1024) -> "1.00 KB"
+        format_size(1024*1024) -> "1.00 MB"
+        format_size(1024*1024*1024) -> "1.00 GB"
+    """
+    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
+        if bytes_val < 1024:
+            return f"{bytes_val:.2f} {unit}"
+        bytes_val /= 1024
+    return f"{bytes_val:.2f} PB"
+
+
+def format_download_status(filename: str, current: int, total: int, speed: float = 0) -> str:
+    """Format download status with progress bar and details.
+    
+    Args:
+        filename: Name of file being downloaded
+        current: Current bytes downloaded
+        total: Total file size
+        speed: Download speed in bytes/sec
+        
+    Returns:
+        Formatted status line
+        
+    Examples:
+        format_download_status("movie.mkv", 512*1024*1024, 2*1024*1024*1024, 10*1024*1024)
+        # Returns: "movie.mkv: [████████████░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0% (512.00 MB / 2.00 GB @ 10.00 MB/s)"
+    """
+    bar = format_progress_bar(current, total, width=30)
+    size_current = format_size(current)
+    size_total = format_size(total)
+    
+    if speed > 0:
+        speed_str = f" @ {format_size(speed)}/s"
+    else:
+        speed_str = ""
+    
+    return f"{bar} ({size_current} / {size_total}{speed_str})"
+
+
+def print_progress(filename: str, current: int, total: int, speed: float = 0, end: str = "\r") -> None:
+    """Print download progress to stderr (doesn't interfere with piped output).
+    
+    Args:
+        filename: File being downloaded
+        current: Current bytes
+        total: Total bytes
+        speed: Speed in bytes/sec
+        end: Line ending (default "\r" for overwriting, use "\n" for final)
+    """
+    status = format_download_status(filename, current, total, speed)
+    debug(status, end=end, flush=True)
+
+
+def print_final_progress(filename: str, total: int, elapsed: float) -> None:
+    """Print final progress line (100%) with time elapsed.
+    
+    Args:
+        filename: File that was downloaded
+        total: Total size
+        elapsed: Time elapsed in seconds
+    """
+    bar = format_progress_bar(total, total, width=30)
+    size_str = format_size(total)
+    
+    # Format elapsed time
+    if elapsed < 60:
+        time_str = f"{elapsed:.1f}s"
+    elif elapsed < 3600:
+        minutes = elapsed / 60
+        time_str = f"{minutes:.1f}m"
+    else:
+        hours = elapsed / 3600
+        time_str = f"{hours:.2f}h"
+    
+    debug(f"{bar} ({size_str}) - {time_str}")
+
+
+if __name__ == "__main__":
+    # Demo
+    import time
+    
+    log("Progress Bar Demo:", file=sys.stderr)
+    
+    # Demo 1: Simple progress
+    for i in range(101):
+        print_progress("demo.bin", i * 10 * 1024 * 1024, 1024 * 1024 * 1024)
+        time.sleep(0.02)
+    
+    print_final_progress("demo.bin", 1024 * 1024 * 1024, 2.0)
+    log()
--- a/helper/query_parser.py
+++ b/helper/query_parser.py
@@ -0,0 +1,159 @@
+"""Dynamic query parser for filtering and field extraction.
+
+Supports query syntax like:
+  - isbn:0557677203
+  - author:"Albert Pike"
+  - title:"Morals and Dogma"
+  - year:2010
+  - isbn:0557677203 author:"Albert Pike"
+  - Mixed with free text: "Morals" isbn:0557677203
+
+This allows flexible query strings that can be parsed by any search provider
+to extract specific fields for filtering and searching.
+"""
+
+from typing import Dict, List, Tuple, Optional, Any
+import re
+
+
+def parse_query(query: str) -> Dict[str, Any]:
+    """Parse a query string into field:value pairs and free text.
+    
+    Args:
+        query: Query string like 'isbn:0557677203 author:"Albert Pike" Morals'
+        
+    Returns:
+        Dictionary with:
+        - 'fields': Dict[field_name, field_value] for structured fields
+        - 'text': str with remaining free text
+        - 'raw': str original query
+    """
+    result = {
+        'fields': {},
+        'text': '',
+        'raw': query,
+    }
+    
+    if not query or not query.strip():
+        return result
+    
+    query = query.strip()
+    remaining_parts = []
+    
+    # Pattern to match: field:value or field:"quoted value"
+    # Matches: word: followed by either quoted string or unquoted word
+    pattern = r'(\w+):(?:"([^"]*)"|(\S+))'
+    
+    pos = 0
+    for match in re.finditer(pattern, query):
+        # Add any text before this match
+        if match.start() > pos:
+            before_text = query[pos:match.start()].strip()
+            if before_text:
+                remaining_parts.append(before_text)
+        
+        field_name = match.group(1).lower()
+        field_value = match.group(2) if match.group(2) is not None else match.group(3)
+        
+        result['fields'][field_name] = field_value
+        pos = match.end()
+    
+    # Add any remaining text after last match
+    if pos < len(query):
+        remaining_text = query[pos:].strip()
+        if remaining_text:
+            remaining_parts.append(remaining_text)
+    
+    result['text'] = ' '.join(remaining_parts)
+    
+    return result
+
+
+def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]:
+    """Get a field value from parsed query, with optional default.
+    
+    Args:
+        parsed_query: Result from parse_query()
+        field_name: Field name to look up (case-insensitive)
+        default: Default value if field not found
+        
+    Returns:
+        Field value or default
+    """
+    return parsed_query.get('fields', {}).get(field_name.lower(), default)
+
+
+def has_field(parsed_query: Dict[str, Any], field_name: str) -> bool:
+    """Check if a field exists in parsed query.
+    
+    Args:
+        parsed_query: Result from parse_query()
+        field_name: Field name to check (case-insensitive)
+        
+    Returns:
+        True if field exists
+    """
+    return field_name.lower() in parsed_query.get('fields', {})
+
+
+def get_free_text(parsed_query: Dict[str, Any]) -> str:
+    """Get the free text portion of a parsed query.
+    
+    Args:
+        parsed_query: Result from parse_query()
+        
+    Returns:
+        Free text or empty string
+    """
+    return parsed_query.get('text', '')
+
+
+def build_query_for_provider(
+    parsed_query: Dict[str, Any],
+    provider: str,
+    extraction_map: Optional[Dict[str, str]] = None
+) -> Tuple[str, Dict[str, str]]:
+    """Build a search query and filters dict for a specific provider.
+    
+    Different providers have different search syntax. This function
+    extracts the appropriate fields for each provider.
+    
+    Args:
+        parsed_query: Result from parse_query()
+        provider: Provider name ('libgen', 'openlibrary', 'soulseek')
+        extraction_map: Optional mapping of field names to provider-specific names
+                       e.g. {'isbn': 'isbn', 'author': 'author', 'title': 'title'}
+        
+    Returns:
+        Tuple of (search_query: str, extracted_fields: Dict[field, value])
+    """
+    extraction_map = extraction_map or {}
+    extracted = {}
+    free_text = get_free_text(parsed_query)
+    
+    # Extract fields based on map
+    for field_name, provider_key in extraction_map.items():
+        if has_field(parsed_query, field_name):
+            extracted[provider_key] = get_field(parsed_query, field_name)
+    
+    # If provider-specific extraction needed, providers can implement it
+    # For now, return the free text as query
+    return free_text, extracted
+
+
+if __name__ == '__main__':
+    # Test cases
+    test_queries = [
+        'isbn:0557677203',
+        'isbn:0557677203 author:"Albert Pike"',
+        'Morals and Dogma isbn:0557677203',
+        'title:"Morals and Dogma" author:"Albert Pike" year:2010',
+        'search term without fields',
+        'author:"John Smith" title:"A Book"',
+    ]
+    
+    for query in test_queries:
+        print(f"\nQuery: {query}")
+        parsed = parse_query(query)
+        print(f"  Fields: {parsed['fields']}")
+        print(f"  Text: {parsed['text']}")
--- a/helper/search_provider.py
+++ b/helper/search_provider.py
--- a/helper/tasks.py
+++ b/helper/tasks.py
@@ -0,0 +1,155 @@
+"""Background task handling and IPC helpers for mpv integration."""
+from __future__ import annotations
+import errno
+import json
+import os
+import socket
+import subprocess
+import sys
+
+from helper.logger import log
+import threading
+import time
+from typing import IO, Iterable
+def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
+    """Connect to the mpv IPC server located at *path*."""
+    deadline = time.time() + timeout
+    if not path:
+        return None
+    if os.name == 'nt':
+        # mpv exposes a named pipe on Windows. Keep retrying until it is ready.
+        while True:
+            try:
+                return open(path, 'r+b', buffering=0)
+            except FileNotFoundError:
+                if time.time() > deadline:
+                    return None
+                time.sleep(0.05)
+            except OSError as exc:  # Pipe busy
+                if exc.errno not in (errno.ENOENT, errno.EPIPE, errno.EBUSY):
+                    raise
+                if time.time() > deadline:
+                    return None
+                time.sleep(0.05)
+    else:
+        sock = socket.socket(socket.AF_UNIX)
+        while True:
+            try:
+                sock.connect(path)
+                return sock.makefile('r+b', buffering=0)
+            except FileNotFoundError:
+                if time.time() > deadline:
+                    return None
+                time.sleep(0.05)
+            except OSError as exc:
+                if exc.errno not in (errno.ENOENT, errno.ECONNREFUSED):
+                    raise
+                if time.time() > deadline:
+                    return None
+                time.sleep(0.05)
+def ipc_sender(ipc: IO[bytes] | None):
+    """Create a helper function for sending script messages via IPC."""
+    if ipc is None:
+        def _noop(_event: str, _payload: dict) -> None:
+            return None
+        return _noop
+    lock = threading.Lock()
+    def _send(event: str, payload: dict) -> None:
+        message = json.dumps({'command': ['script-message', event, json.dumps(payload)]}, ensure_ascii=False)
+        encoded = message.encode('utf-8') + b'\n'
+        with lock:
+            try:
+                ipc.write(encoded)
+                ipc.flush()
+            except OSError:
+                pass
+    return _send
+def iter_stream(stream: Iterable[str]) -> Iterable[str]:
+    for raw in stream:
+        yield raw.rstrip('\r\n')
+def _run_task(args, parser) -> int:
+    if not args.command:
+        parser.error('run-task requires a command to execute (use "--" before the command).')
+    env = os.environ.copy()
+    for entry in args.env:
+        key, sep, value = entry.partition('=')
+        if not sep:
+            parser.error(f'Invalid environment variable definition: {entry!r}')
+        env[key] = value
+    command = list(args.command)
+    if command and command[0] == '--':
+        command.pop(0)
+    notifier = ipc_sender(connect_ipc(args.ipc, timeout=args.ipc_timeout))
+    if not command:
+        notifier('downlow-task-event', {
+            'id': args.task_id,
+            'event': 'error',
+            'message': 'No command provided after separator',
+        })
+        log('[downlow.py] No command provided for run-task', file=sys.stderr)
+        return 1
+    if command and isinstance(command[0], str) and sys.executable:
+        first = command[0].lower()
+        if first in {'python', 'python3', 'py', 'python.exe', 'python3.exe', 'py.exe'}:
+            command[0] = sys.executable
+    if os.environ.get('DOWNLOW_DEBUG'):
+        log(f"Launching command: {command}", file=sys.stderr)
+    notifier('downlow-task-event', {
+        'id': args.task_id,
+        'event': 'start',
+        'command': command,
+        'cwd': args.cwd or os.getcwd(),
+    })
+    try:
+        process = subprocess.Popen(
+            command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            cwd=args.cwd or None,
+            env=env,
+            text=True,
+            bufsize=1,
+            universal_newlines=True,
+        )
+    except FileNotFoundError as exc:
+        notifier('downlow-task-event', {
+            'id': args.task_id,
+            'event': 'error',
+            'message': f'Executable not found: {exc.filename}',
+        })
+        log(f"{exc}", file=sys.stderr)
+        return 1
+    stdout_lines: list[str] = []
+    stderr_lines: list[str] = []
+    def pump(stream: IO[str], label: str, sink: list[str]) -> None:
+        for line in iter_stream(stream):
+            sink.append(line)
+            notifier('downlow-task-event', {
+                'id': args.task_id,
+                'event': label,
+                'line': line,
+            })
+    threads = []
+    if process.stdout:
+        t_out = threading.Thread(target=pump, args=(process.stdout, 'stdout', stdout_lines), daemon=True)
+        t_out.start()
+        threads.append(t_out)
+    if process.stderr:
+        t_err = threading.Thread(target=pump, args=(process.stderr, 'stderr', stderr_lines), daemon=True)
+        t_err.start()
+        threads.append(t_err)
+    return_code = process.wait()
+    for t in threads:
+        t.join(timeout=0.1)
+    notifier('downlow-task-event', {
+        'id': args.task_id,
+        'event': 'exit',
+        'returncode': return_code,
+        'success': return_code == 0,
+    })
+    # Also mirror aggregated output to stdout/stderr for compatibility when IPC is unavailable.
+    if stdout_lines:
+        log('\n'.join(stdout_lines))
+    if stderr_lines:
+        log('\n'.join(stderr_lines), file=sys.stderr)
+    return return_code
--- a/helper/unified_book_downloader.py
+++ b/helper/unified_book_downloader.py
@@ -0,0 +1,706 @@
+"""Unified book downloader - handles Archive.org borrowing and Libgen fallback.
+
+This module provides a single interface for downloading books from multiple sources:
+1. Try Archive.org direct download (if available)
+2. Try Archive.org borrowing (if user has credentials)
+3. Fallback to Libgen search by ISBN
+4. Attempt Libgen download
+
+All sources integrated with proper metadata scraping and error handling.
+"""
+
+import logging
+import asyncio
+import requests
+from typing import Optional, Dict, Any, Tuple, List, Callable, cast
+from pathlib import Path
+
+from helper.logger import debug
+
+logger = logging.getLogger(__name__)
+
+
+class UnifiedBookDownloader:
+    """Unified interface for downloading books from multiple sources."""
+    
+    def __init__(self, config: Optional[Dict[str, Any]] = None, output_dir: Optional[str] = None):
+        """Initialize the unified book downloader.
+        
+        Args:
+            config: Configuration dict with credentials
+            output_dir: Default output directory
+        """
+        self.config = config or {}
+        self.output_dir = output_dir
+        self.session = requests.Session()
+        
+        # Import download functions from their modules
+        self._init_downloaders()
+    
+    def _init_downloaders(self) -> None:
+        """Initialize downloader functions from their modules."""
+        try:
+            from helper.archive_client import (
+                check_direct_download,
+                get_openlibrary_by_isbn,
+                loan
+            )
+            self.check_direct_download = check_direct_download
+            self.get_openlibrary_by_isbn = get_openlibrary_by_isbn
+            self.loan_func = loan
+            logger.debug("[UnifiedBookDownloader] Loaded archive.org downloaders from archive_client")
+        except Exception as e:
+            logger.warning(f"[UnifiedBookDownloader] Failed to load archive.org functions: {e}")
+            self.check_direct_download = None
+            self.get_openlibrary_by_isbn = None
+            self.loan_func = None
+        
+        try:
+            from helper.libgen_service import (
+                DEFAULT_LIMIT as _LIBGEN_DEFAULT_LIMIT,
+                download_from_mirror as _libgen_download,
+                search_libgen as _libgen_search,
+            )
+
+            def _log_info(message: str) -> None:
+                debug(f"[UnifiedBookDownloader] {message}")
+
+            def _log_error(message: str) -> None:
+                logger.error(f"[UnifiedBookDownloader] {message}")
+
+            self.search_libgen = lambda query, limit=_LIBGEN_DEFAULT_LIMIT: _libgen_search(
+                query,
+                limit=limit,
+                log_info=_log_info,
+                log_error=_log_error,
+            )
+            self.download_from_mirror = lambda mirror_url, output_path: _libgen_download(
+                mirror_url,
+                output_path,
+                log_info=_log_info,
+                log_error=_log_error,
+            )
+            logger.debug("[UnifiedBookDownloader] Loaded Libgen helpers")
+        except Exception as e:
+            logger.warning(f"[UnifiedBookDownloader] Failed to load Libgen helpers: {e}")
+            self.search_libgen = None
+            self.download_from_mirror = None
+    
+    def get_download_options(self, book_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Get all available download options for a book.
+        
+        Checks in priority order:
+        1. Archive.org direct download (public domain)
+        2. Archive.org borrowing (if credentials available and book is borrowable)
+        3. Libgen fallback (by ISBN)
+        
+        Args:
+            book_data: Book metadata dict with at least 'openlibrary_id' or 'isbn'
+            
+        Returns:
+            Dict with available download methods and metadata
+        """
+        options = {
+            'book_title': book_data.get('title', 'Unknown'),
+            'book_author': book_data.get('author', 'Unknown'),
+            'isbn': book_data.get('isbn', ''),
+            'openlibrary_id': book_data.get('openlibrary_id', ''),
+            'methods': [],  # Will be sorted by priority
+            'metadata': {}
+        }
+        
+        # Extract book ID from openlibrary_id (e.g., OL8513721M -> 8513721, OL8513721W -> 8513721)
+        ol_id = book_data.get('openlibrary_id', '')
+        book_id = None
+        
+        if ol_id.startswith('OL') and len(ol_id) > 2:
+            # Remove 'OL' prefix (keep everything after it including the suffix letter)
+            # The book_id is all digits after 'OL'
+            book_id = ''.join(c for c in ol_id[2:] if c.isdigit())
+            
+            # PRIORITY 1: Check direct download (fastest, no auth needed)
+            if self.check_direct_download:
+                try:
+                    can_download, pdf_url = self.check_direct_download(book_id)
+                    if can_download:
+                        options['methods'].append({
+                            'type': 'archive.org_direct',
+                            'label': 'Archive.org Direct Download',
+                            'requires_auth': False,
+                            'pdf_url': pdf_url,
+                            'book_id': book_id,
+                            'priority': 1  # Highest priority
+                        })
+                        logger.info(f"[UnifiedBookDownloader] Direct download available for {book_id}")
+                except Exception as e:
+                    logger.debug(f"[UnifiedBookDownloader] Direct download check failed: {e}")
+            
+            # PRIORITY 2: Check borrowing option (requires auth, 14-day loan)
+            # First verify the book is actually lendable via OpenLibrary API
+            if self._has_archive_credentials():
+                is_lendable, status = self._check_book_lendable_status(ol_id)
+                
+                if is_lendable:
+                    options['methods'].append({
+                        'type': 'archive.org_borrow',
+                        'label': 'Archive.org Borrow',
+                        'requires_auth': True,
+                        'book_id': book_id,
+                        'priority': 2  # Second priority
+                    })
+                    logger.info(f"[UnifiedBookDownloader] Borrow option available for {book_id} (status: {status})")
+                else:
+                    logger.debug(f"[UnifiedBookDownloader] Borrow not available for {book_id} (status: {status})")
+        
+        # PRIORITY 3: Check Libgen fallback (by ISBN, no auth needed, most reliable)
+        isbn = book_data.get('isbn', '')
+        title = book_data.get('title', '')
+        author = book_data.get('author', '')
+        
+        if self.search_libgen:
+            # Can use Libgen if we have ISBN OR title (or both)
+            if isbn or title:
+                options['methods'].append({
+                    'type': 'libgen',
+                    'label': 'Libgen Search & Download',
+                    'requires_auth': False,
+                    'isbn': isbn,
+                    'title': title,
+                    'author': author,
+                    'priority': 3  # Third priority (fallback)
+                })
+                logger.info(f"[UnifiedBookDownloader] Libgen fallback available (ISBN: {isbn if isbn else 'N/A'}, Title: {title})")
+        
+        # Sort by priority (higher priority first)
+        options['methods'].sort(key=lambda x: x.get('priority', 999))
+        
+        return options
+    
+    def _has_archive_credentials(self) -> bool:
+        """Check if Archive.org credentials are available."""
+        try:
+            from helper.archive_client import credential_openlibrary
+            email, password = credential_openlibrary(self.config)
+            return bool(email and password)
+        except Exception:
+            return False
+    
+    def _check_book_lendable_status(self, ol_id: str) -> Tuple[bool, Optional[str]]:
+        """Check if a book is lendable via OpenLibrary API.
+        
+        Queries: https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}
+        Note: Only works with Edition IDs (OL...M), not Work IDs (OL...W)
+        
+        Args:
+            ol_id: OpenLibrary ID (e.g., OL8513721M for Edition or OL4801915W for Work)
+            
+        Returns:
+            Tuple of (is_lendable: bool, status_reason: Optional[str])
+        """
+        try:
+            if not ol_id.startswith('OL'):
+                return False, "Invalid OpenLibrary ID format"
+            
+            # If this is a Work ID (ends with W), we can't query Volumes API
+            # Work IDs are abstract umbrella records, not specific editions
+            if ol_id.endswith('W'):
+                logger.debug(f"[UnifiedBookDownloader] Work ID {ol_id} - skipping Volumes API (not lendable)")
+                return False, "Work ID not supported by Volumes API (not a specific edition)"
+            
+            # If it ends with M, it's an Edition ID - proceed with query
+            if not ol_id.endswith('M'):
+                logger.debug(f"[UnifiedBookDownloader] Unknown ID type {ol_id} (not M or W)")
+                return False, "Invalid OpenLibrary ID type"
+            
+            url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}"
+            response = self.session.get(url, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            
+            # Empty response means no records found
+            if not data:
+                logger.debug(f"[UnifiedBookDownloader] Empty response for {ol_id}")
+                return False, "No availability data found"
+            
+            # The response is wrapped in OLID key
+            olid_key = f"OLID:{ol_id}"
+            if olid_key not in data:
+                logger.debug(f"[UnifiedBookDownloader] OLID key not found in response")
+                return False, "No availability data found"
+            
+            olid_data = data[olid_key]
+            
+            # Check items array for lendable status
+            if 'items' in olid_data and olid_data['items'] and len(olid_data['items']) > 0:
+                items = olid_data['items']
+                
+                # Check the first item for lending status
+                first_item = items[0]
+                
+                # Handle both dict and string representations (PowerShell converts to string)
+                if isinstance(first_item, dict):
+                    status = first_item.get('status', '')
+                else:
+                    # String representation - check if 'lendable' is in it
+                    status = str(first_item).lower()
+                
+                is_lendable = 'lendable' in str(status).lower()
+                
+                if is_lendable:
+                    logger.info(f"[UnifiedBookDownloader] Book {ol_id} is lendable")
+                    return True, "LENDABLE"
+                else:
+                    status_str = status.get('status', 'NOT_LENDABLE') if isinstance(status, dict) else 'NOT_LENDABLE'
+                    logger.debug(f"[UnifiedBookDownloader] Book {ol_id} is not lendable (status: {status_str})")
+                    return False, status_str
+            else:
+                # No items array or empty
+                logger.debug(f"[UnifiedBookDownloader] No items found for {ol_id}")
+                return False, "Not available for lending"
+                
+        except requests.exceptions.Timeout:
+            logger.warning(f"[UnifiedBookDownloader] OpenLibrary API timeout for {ol_id}")
+            return False, "API timeout"
+        except Exception as e:
+            logger.debug(f"[UnifiedBookDownloader] Failed to check lendable status for {ol_id}: {e}")
+            return False, f"API error"
+    
+    
+    async def download_book(self, method: Dict[str, Any], output_dir: Optional[str] = None) -> Tuple[bool, str]:
+        """Download a book using the specified method.
+        
+        Args:
+            method: Download method dict from get_download_options()
+            output_dir: Directory to save the book
+            
+        Returns:
+            Tuple of (success: bool, message: str)
+        """
+        output_dir = output_dir or self.output_dir or str(Path.home() / "Downloads")
+        method_type = method.get('type', '')
+        
+        logger.info(f"[UnifiedBookDownloader] Starting download with method: {method_type}")
+        
+        try:
+            if method_type == 'archive.org_direct':
+                return await self._download_archive_direct(method, output_dir)
+            
+            elif method_type == 'archive.org_borrow':
+                return await self._download_archive_borrow(method, output_dir)
+            
+            elif method_type == 'libgen':
+                return await self._download_libgen(method, output_dir)
+            
+            else:
+                return False, f"Unknown download method: {method_type}"
+        
+        except Exception as e:
+            logger.error(f"[UnifiedBookDownloader] Download error: {e}", exc_info=True)
+            return False, f"Download failed: {str(e)}"
+    
+    async def _download_archive_direct(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
+        """Download directly from Archive.org."""
+        try:
+            pdf_url = method.get('pdf_url', '')
+            book_id = method.get('book_id', '')
+            
+            if not pdf_url:
+                return False, "No PDF URL available"
+            
+            # Determine output filename
+            filename = f"{book_id}.pdf"
+            output_path = Path(output_dir) / filename
+            
+            logger.info(f"[UnifiedBookDownloader] Downloading PDF from: {pdf_url}")
+            
+            # Download in a thread to avoid blocking
+            loop = asyncio.get_event_loop()
+            success = await loop.run_in_executor(
+                None,
+                self._download_file,
+                pdf_url,
+                str(output_path)
+            )
+            
+            if success:
+                logger.info(f"[UnifiedBookDownloader] Successfully downloaded to: {output_path}")
+                return True, f"Downloaded to: {output_path}"
+            else:
+                return False, "Failed to download PDF"
+        
+        except Exception as e:
+            logger.error(f"[UnifiedBookDownloader] Archive direct download error: {e}")
+            return False, f"Archive download failed: {str(e)}"
+    
+    async def _download_archive_borrow(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
+        """Download via Archive.org borrowing (requires credentials).
+        
+        Process (follows archive_client.py pattern):
+        1. Login to Archive.org with credentials
+        2. Call loan endpoint to borrow the book (14-day loan)
+        3. Get book info (page links, metadata)
+        4. Download all pages as images
+        5. Merge images into PDF
+        
+        The loan function from archive_client.py handles:
+        - Checking if book needs borrowing (status 400 = "doesn't need to be borrowed")
+        - Creating borrow token for access
+        - Handling borrow failures
+        
+        get_book_infos() extracts page links from the borrowed book viewer
+        download() downloads all pages using thread pool
+        img2pdf merges pages into searchable PDF
+        """
+        try:
+            from helper.archive_client import credential_openlibrary
+            
+            book_id = method.get('book_id', '')
+            
+            # Get credentials
+            email, password = credential_openlibrary(self.config)
+            if not email or not password:
+                return False, "Archive.org credentials not configured"
+            
+            logger.info(f"[UnifiedBookDownloader] Logging into Archive.org...")
+            
+            # Login and borrow (in thread, following download_book.py pattern)
+            loop = asyncio.get_event_loop()
+            borrow_result = await loop.run_in_executor(
+                None,
+                self._archive_borrow_and_download,
+                email,
+                password,
+                book_id,
+                output_dir
+            )
+            
+            if borrow_result and isinstance(borrow_result, tuple):
+                success, filepath = borrow_result
+                if success:
+                    logger.info(f"[UnifiedBookDownloader] Borrow succeeded: {filepath}")
+                    return True, filepath
+                else:
+                    logger.warning(f"[UnifiedBookDownloader] Borrow failed: {filepath}")
+                    return False, filepath
+            else:
+                return False, "Failed to borrow book from Archive.org"
+        
+        except Exception as e:
+            logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
+            return False, f"Archive borrow failed: {str(e)}"
+    
+    async def _download_libgen(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
+        """Download via Libgen search and download with mirror fallback."""
+        try:
+            isbn = method.get('isbn', '')
+            title = method.get('title', '')
+            
+            if not isbn and not title:
+                return False, "Need ISBN or title for Libgen search"
+            
+            if not self.search_libgen:
+                return False, "Libgen searcher not available"
+            
+            # Define wrapper functions to safely call the methods
+            search_func = self.search_libgen
+            if search_func is None:
+                return False, "Search function not available"
+            
+            preloaded_results = method.get('results')
+            loop = asyncio.get_event_loop()
+
+            if preloaded_results:
+                results = list(preloaded_results)
+                if not results:
+                    results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
+            else:
+                results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
+            
+            if not results:
+                logger.warning(f"[UnifiedBookDownloader] No Libgen results for: {isbn or title}")
+                return False, f"No Libgen results found for: {isbn or title}"
+            
+            logger.info(f"[UnifiedBookDownloader] Found {len(results)} Libgen results")
+            
+            # Determine output filename (use first result for naming)
+            first_result = results[0]
+            filename = f"{first_result.get('title', 'book')}"
+            filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
+            
+            # Try each result's mirror until one succeeds
+            for idx, result in enumerate(results, 1):
+                mirror_url = result.get('mirror_url', '')
+                
+                if not mirror_url:
+                    logger.debug(f"[UnifiedBookDownloader] Result {idx}: No mirror URL")
+                    continue
+                
+                # Use extension from this result if available
+                extension = result.get('extension', 'pdf')
+                if extension and not extension.startswith('.'):
+                    extension = f".{extension}"
+                elif not extension:
+                    extension = '.pdf'
+                
+                output_path = Path(output_dir) / (filename + extension)
+                
+                logger.info(f"[UnifiedBookDownloader] Trying mirror {idx}/{len(results)}: {mirror_url}")
+                
+                download_func = self.download_from_mirror
+                if download_func is None:
+                    return False, "Download function not available"
+                
+                download_callable = cast(Callable[[str, str], bool], download_func)
+
+                def download_wrapper():
+                    return download_callable(mirror_url, str(output_path))
+                
+                # Download (in thread)
+                try:
+                    success = await loop.run_in_executor(None, download_wrapper)
+                    
+                    if success:
+                        # Validate downloaded file is not HTML (common Libgen issue)
+                        if output_path.exists():
+                            try:
+                                with open(output_path, 'rb') as f:
+                                    file_start = f.read(1024).decode('utf-8', errors='ignore').lower()
+                                    if '<!doctype' in file_start or '<html' in file_start:
+                                        logger.warning(f"[UnifiedBookDownloader] Mirror {idx} returned HTML instead of file, trying next mirror...")
+                                        output_path.unlink()  # Delete the HTML file
+                                        continue
+                            except Exception as e:
+                                logger.debug(f"[UnifiedBookDownloader] Could not validate file content: {e}")
+                        
+                        logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {output_path}")
+                        return True, str(output_path)
+                    else:
+                        logger.warning(f"[UnifiedBookDownloader] Mirror {idx} download failed, trying next...")
+                except Exception as e:
+                    logger.warning(f"[UnifiedBookDownloader] Mirror {idx} error: {e}, trying next...")
+                    continue
+            
+            return False, f"All {len(results)} mirrors failed"
+        
+        except Exception as e:
+            logger.error(f"[UnifiedBookDownloader] Libgen download error: {e}")
+            return False, f"Libgen download failed: {str(e)}"
+
+    async def download_libgen_selection(
+        self,
+        selected: Dict[str, Any],
+        remaining: Optional[List[Dict[str, Any]]] = None,
+        output_dir: Optional[str] = None,
+    ) -> Tuple[bool, str]:
+        """Download a specific Libgen result with optional fallbacks."""
+
+        if not isinstance(selected, dict):
+            return False, "Selected result must be a dictionary"
+
+        ordered_results: List[Dict[str, Any]] = [selected]
+        if remaining:
+            for item in remaining:
+                if isinstance(item, dict) and item is not selected:
+                    ordered_results.append(item)
+
+        method: Dict[str, Any] = {
+            'type': 'libgen',
+            'isbn': selected.get('isbn', '') or '',
+            'title': selected.get('title', '') or '',
+            'author': selected.get('author', '') or '',
+            'results': ordered_results,
+        }
+
+        return await self.download_book(method, output_dir)
+
+    def download_libgen_selection_sync(
+        self,
+        selected: Dict[str, Any],
+        remaining: Optional[List[Dict[str, Any]]] = None,
+        output_dir: Optional[str] = None,
+    ) -> Tuple[bool, str]:
+        """Synchronous helper for downloading a Libgen selection."""
+
+        async def _run() -> Tuple[bool, str]:
+            return await self.download_libgen_selection(selected, remaining, output_dir)
+
+        loop = asyncio.new_event_loop()
+        try:
+            asyncio.set_event_loop(loop)
+            return loop.run_until_complete(_run())
+        finally:
+            loop.close()
+            asyncio.set_event_loop(None)
+    
+    def _download_file(self, url: str, output_path: str) -> bool:
+        """Download a file from URL."""
+        try:
+            response = requests.get(url, stream=True, timeout=30)
+            response.raise_for_status()
+            
+            with open(output_path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+            
+            return True
+        except Exception as e:
+            logger.error(f"[UnifiedBookDownloader] File download error: {e}")
+            return False
+    
+    def _archive_borrow_and_download(self, email: str, password: str, book_id: str, output_dir: str) -> Tuple[bool, str]:
+        """Borrow a book from Archive.org and download pages as PDF.
+        
+        This follows the exact process from archive_client.py:
+        1. Login with credentials
+        2. Call loan() to create 14-day borrow
+        3. Get book info (extract page URLs)
+        4. Download all pages as images
+        5. Merge images into searchable PDF
+        
+        Returns tuple of (success: bool, filepath/message: str)
+        """
+        try:
+            from helper.archive_client import login, loan, get_book_infos, download
+            import tempfile
+            import shutil
+            
+            logger.info(f"[UnifiedBookDownloader] Logging into Archive.org as {email}")
+            session = login(email, password)
+            
+            logger.info(f"[UnifiedBookDownloader] Attempting to borrow book: {book_id}")
+            # Call loan to create the 14-day borrow
+            session = loan(session, book_id, verbose=True)
+            
+            # If we get here, borrowing succeeded
+            logger.info(f"[UnifiedBookDownloader] Successfully borrowed book: {book_id}")
+            
+            # Now get the book info (page URLs and metadata)
+            logger.info(f"[UnifiedBookDownloader] Extracting book page information...")
+            # Try both URL formats: with /borrow and without
+            book_urls = [
+                f"https://archive.org/borrow/{book_id}",  # Try borrow page first (for borrowed books)
+                f"https://archive.org/details/{book_id}"   # Fallback to details page
+            ]
+            
+            title = None
+            links = None
+            metadata = None
+            last_error = None
+            
+            for book_url in book_urls:
+                try:
+                    logger.debug(f"[UnifiedBookDownloader] Trying to get book info from: {book_url}")
+                    response = session.get(book_url, timeout=10)
+                    
+                    # Log response status
+                    if response.status_code != 200:
+                        logger.debug(f"[UnifiedBookDownloader] URL returned {response.status_code}: {book_url}")
+                        # Continue to try next URL
+                        continue
+                    
+                    # Try to parse the response
+                    title, links, metadata = get_book_infos(session, book_url)
+                    logger.info(f"[UnifiedBookDownloader] Successfully got info from: {book_url}")
+                    logger.info(f"[UnifiedBookDownloader] Found {len(links)} pages to download")
+                    break
+                except Exception as e:
+                    logger.debug(f"[UnifiedBookDownloader] Failed with {book_url}: {e}")
+                    last_error = e
+                    continue
+            
+            if links is None:
+                logger.error(f"[UnifiedBookDownloader] Failed to get book info from all URLs: {last_error}")
+                # Borrow extraction failed - return False
+                return False, "Could not extract borrowed book pages"
+            
+            # Create temporary directory for images
+            temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=output_dir)
+            logger.info(f"[UnifiedBookDownloader] Downloading {len(links)} pages to temporary directory...")
+            
+            try:
+                # Download all pages (uses thread pool)
+                images = download(
+                    session=session,
+                    n_threads=10,
+                    directory=temp_dir,
+                    links=links,
+                    scale=3,  # Default resolution
+                    book_id=book_id
+                )
+                
+                logger.info(f"[UnifiedBookDownloader] Downloaded {len(images)} pages")
+                
+                # Try to merge pages into PDF
+                try:
+                    import img2pdf
+                    logger.info(f"[UnifiedBookDownloader] Merging pages into PDF...")
+                    
+                    # Prepare PDF metadata
+                    pdfmeta = {}
+                    if metadata:
+                        if "title" in metadata:
+                            pdfmeta["title"] = metadata["title"]
+                        if "creator" in metadata:
+                            pdfmeta["author"] = metadata["creator"]
+                    pdfmeta["keywords"] = [f"https://archive.org/details/{book_id}"]
+                    pdfmeta["creationdate"] = None  # Avoid timezone issues
+                    
+                    # Convert images to PDF
+                    pdf_content = img2pdf.convert(images, **pdfmeta) if images else None
+                    if not pdf_content:
+                        logger.error(f"[UnifiedBookDownloader] PDF conversion failed")
+                        return False, "Failed to convert pages to PDF"
+                    
+                    # Save the PDF
+                    pdf_filename = f"{title}.pdf" if title else "book.pdf"
+                    pdf_path = Path(output_dir) / pdf_filename
+                    
+                    # Handle duplicate filenames
+                    i = 1
+                    while pdf_path.exists():
+                        pdf_path = Path(output_dir) / f"{title or 'book'}({i}).pdf"
+                        i += 1
+                    
+                    with open(pdf_path, 'wb') as f:
+                        f.write(pdf_content)
+                    
+                    logger.info(f"[UnifiedBookDownloader] Successfully created PDF: {pdf_path}")
+                    
+                    return True, str(pdf_path)
+                
+                except ImportError:
+                    logger.warning(f"[UnifiedBookDownloader] img2pdf not available, saving as JPG collection instead")
+                    
+                    # Create JPG collection directory
+                    if not title:
+                        title = f"book_{book_id}"
+                    jpg_dir = Path(output_dir) / title
+                    i = 1
+                    while jpg_dir.exists():
+                        jpg_dir = Path(output_dir) / f"{title}({i})"
+                        i += 1
+                    
+                    # Move temporary directory to final location
+                    shutil.move(temp_dir, str(jpg_dir))
+                    temp_dir = None  # Mark as already moved
+                    
+                    logger.info(f"[UnifiedBookDownloader] Saved as JPG collection: {jpg_dir}")
+                    return True, str(jpg_dir)
+            
+            finally:
+                # Clean up temporary directory if it still exists
+                if temp_dir and Path(temp_dir).exists():
+                    shutil.rmtree(temp_dir)
+        
+        except SystemExit:
+            # loan() function calls sys.exit on failure - catch it
+            logger.error(f"[UnifiedBookDownloader] Borrow process exited (book may not be borrowable)")
+            return False, "Book could not be borrowed (may not be available for borrowing)"
+        except Exception as e:
+            logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
+            return False, f"Borrow failed: {str(e)}"
+    
+    def close(self) -> None:
+        """Close the session."""
+        self.session.close()
--- a/helper/utils.py
+++ b/helper/utils.py
@@ -0,0 +1,492 @@
+"""General-purpose helpers used across the downlow CLI."""
+from __future__ import annotations
+
+import json
+import hashlib
+import ffmpeg
+import base64
+import logging
+import time
+from pathlib import Path
+from typing import Any, Iterable
+from datetime import datetime
+from dataclasses import dataclass, field
+from fnmatch import fnmatch
+from urllib.parse import urlparse
+
+import helper.utils_constant
+
+try:
+    import cbor2
+except ImportError:
+    cbor2 = None  # type: ignore
+
+CHUNK_SIZE = 1024 * 1024  # 1 MiB
+_format_logger = logging.getLogger(__name__)
+def ensure_directory(path: Path) -> None:
+    """Ensure *path* exists as a directory."""
+    try:
+        path.mkdir(parents=True, exist_ok=True)
+    except OSError as exc:  # pragma: no cover - surfaced to caller
+        raise RuntimeError(f"Failed to create directory {path}: {exc}") from exc
+def unique_path(path: Path) -> Path:
+    """Return a unique path by appending " (n)" if needed."""
+    if not path.exists():
+        return path
+    stem = path.stem
+    suffix = path.suffix
+    parent = path.parent
+    counter = 1
+    while True:
+        candidate = parent / f"{stem} ({counter}){suffix}"
+        if not candidate.exists():
+            return candidate
+        counter += 1
+
+def sanitize_metadata_value(value: Any) -> str | None:
+    if value is None:
+        return None
+    if not isinstance(value, str):
+        value = str(value)
+    value = value.replace('\x00', ' ').replace('\r', ' ').replace('\n', ' ').strip()
+    if not value:
+        return None
+    return value
+def unique_preserve_order(values: Iterable[str]) -> list[str]:
+    seen: set[str] = set()
+    ordered: list[str] = []
+    for value in values:
+        if value not in seen:
+            seen.add(value)
+            ordered.append(value)
+    return ordered
+def sha256_file(file_path: Path) -> str:
+    """Return the SHA-256 hex digest of *path*."""
+    hasher = hashlib.sha256()
+    with file_path.open('rb') as handle:
+        for chunk in iter(lambda: handle.read(CHUNK_SIZE), b''):
+            hasher.update(chunk)
+    return hasher.hexdigest()
+
+
+def create_metadata_sidecar(file_path: Path, metadata: dict) -> None:
+    """Create a .metadata sidecar file with JSON metadata.
+    
+    The metadata dict should contain title. If not present, it will be derived from
+    the filename. This ensures the .metadata file can be matched during batch import.
+    
+    Args:
+        file_path: Path to the exported file
+        metadata: Dictionary of metadata to save
+    """
+    if not metadata:
+        return
+    file_name = file_path.stem
+    file_ext = file_path.suffix.lower()
+    # Ensure metadata has a title field that matches the filename (without extension)
+    # This allows the sidecar to be matched and imported properly during batch import
+    if 'title' not in metadata or not metadata.get('title'):
+        metadata['title'] = file_name
+    metadata['hash'] = sha256_file(file_path)
+    metadata['size'] = Path(file_path).stat().st_size
+    format_found = False
+    for mime_type, ext_map in helper.utils_constant.mime_maps.items():
+        for key, info in ext_map.items():
+            if info.get("ext") == file_ext:
+                metadata['type'] = mime_type
+                format_found = True
+                break
+        if format_found:
+            break
+    else:
+        metadata['type'] = 'unknown'
+    metadata.update(ffprobe(str(file_path)))
+
+    
+    metadata_path = file_path.with_suffix(file_path.suffix + '.metadata')
+    try:
+        with open(metadata_path, 'w', encoding='utf-8') as f:
+            json.dump(metadata, f, ensure_ascii=False, indent=2)
+    except OSError as exc:
+        raise RuntimeError(f"Failed to write metadata sidecar {metadata_path}: {exc}") from exc
+    
+def create_tags_sidecar(file_path: Path, tags: set) -> None:
+    """Create a .tags sidecar file with tags (one per line).
+    
+    Args:
+        file_path: Path to the exported file
+        tags: Set of tag strings
+    """
+    if not tags:
+        return
+    
+    tags_path = file_path.with_suffix(file_path.suffix + '.tags')
+    try:
+        with open(tags_path, 'w', encoding='utf-8') as f:
+            for tag in sorted(tags):
+                f.write(f"{tag}\n")
+    except Exception as e:
+        raise RuntimeError(f"Failed to create tags sidecar {tags_path}: {e}") from e
+
+        
+def ffprobe(file_path: str) -> dict:
+    probe = ffmpeg.probe(file_path)
+    metadata = {}
+
+    # Format-level info
+    fmt = probe.get("format", {})
+    metadata["duration"] = float(fmt.get("duration", 0)) if "duration" in fmt else None
+    metadata["size"] = int(fmt.get("size", 0)) if "size" in fmt else None
+    metadata["format_name"] = fmt.get("format_name", None)
+
+    # Stream-level info
+    for stream in probe.get("streams", []):
+        codec_type = stream.get("codec_type")
+        if codec_type == "audio":
+            metadata["audio_codec"] = stream.get("codec_name")
+            metadata["bitrate"] = int(stream.get("bit_rate", 0)) if "bit_rate" in stream else None
+            metadata["samplerate"] = int(stream.get("sample_rate", 0)) if "sample_rate" in stream else None
+            metadata["channels"] = int(stream.get("channels", 0)) if "channels" in stream else None
+        elif codec_type == "video":
+            metadata["video_codec"] = stream.get("codec_name")
+            metadata["width"] = int(stream.get("width", 0)) if "width" in stream else None
+            metadata["height"] = int(stream.get("height", 0)) if "height" in stream else None
+        elif codec_type == "image":
+            metadata["image_codec"] = stream.get("codec_name")
+            metadata["width"] = int(stream.get("width", 0)) if "width" in stream else None
+            metadata["height"] = int(stream.get("height", 0)) if "height" in stream else None
+
+    return metadata
+
+
+# ============================================================================
+# CBOR Utilities - Consolidated from cbor.py
+# ============================================================================
+"""CBOR utilities backed by the `cbor2` library."""
+
+
+def decode_cbor(data: bytes) -> Any:
+    """Decode *data* from CBOR into native Python objects."""
+    if not data:
+        return None
+    if cbor2 is None:
+        raise ImportError("cbor2 library is required for CBOR decoding")
+    return cbor2.loads(data)
+
+
+def jsonify(value: Any) -> Any:
+    """Convert *value* into a JSON-friendly structure."""
+    if isinstance(value, dict):
+        return {str(key): jsonify(val) for key, val in value.items()}
+    if isinstance(value, list):
+        return [jsonify(item) for item in value]
+    if isinstance(value, bytes):
+        return {"__bytes__": base64.b64encode(value).decode("ascii")}
+    return value
+
+
+# ============================================================================
+# Format Utilities - Consolidated from format_utils.py
+# ============================================================================
+"""Formatting utilities for displaying metadata consistently across the application."""
+
+
+def format_bytes(bytes_value) -> str:
+    """Format bytes to human-readable format (e.g., '1.5 MB', '250 KB').
+    
+    Args:
+        bytes_value: Size in bytes (int or float)
+        
+    Returns:
+        Formatted string like '1.5 MB' or '756 MB'
+    """
+    if bytes_value is None or bytes_value <= 0:
+        return "0 B"
+    
+    if isinstance(bytes_value, (int, float)):
+        for unit in ("B", "KB", "MB", "GB", "TB"):
+            if bytes_value < 1024:
+                if unit == "B":
+                    return f"{int(bytes_value)} {unit}"
+                return f"{bytes_value:.1f} {unit}"
+            bytes_value /= 1024
+        return f"{bytes_value:.1f} PB"
+    return str(bytes_value)
+
+
+def format_duration(seconds) -> str:
+    """Format duration in seconds to human-readable format (e.g., '1h 23m 5s', '5m 30s').
+    
+    Args:
+        seconds: Duration in seconds (int or float)
+        
+    Returns:
+        Formatted string like '1:23:45' or '5:30'
+    """
+    if seconds is None or seconds == '':
+        return "N/A"
+    
+    if isinstance(seconds, str):
+        try:
+            seconds = float(seconds)
+        except ValueError:
+            return str(seconds)
+    
+    if not isinstance(seconds, (int, float)):
+        return str(seconds)
+    
+    total_seconds = int(seconds)
+    if total_seconds < 0:
+        return "N/A"
+    
+    hours = total_seconds // 3600
+    minutes = (total_seconds % 3600) // 60
+    secs = total_seconds % 60
+    
+    if hours > 0:
+        return f"{hours}:{minutes:02d}:{secs:02d}"
+    elif minutes > 0:
+        return f"{minutes}:{secs:02d}"
+    else:
+        return f"{secs}s"
+
+
+def format_timestamp(timestamp_str) -> str:
+    """Format ISO timestamp to readable format.
+    
+    Args:
+        timestamp_str: ISO format timestamp string or None
+        
+    Returns:
+        Formatted string like "2025-10-28 19:36:01" or original string if parsing fails
+    """
+    if not timestamp_str:
+        return "N/A"
+    
+    try:
+        # Handle ISO format timestamps
+        if isinstance(timestamp_str, str):
+            # Try parsing ISO format
+            if 'T' in timestamp_str:
+                dt = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
+            else:
+                # Try other common formats
+                dt = datetime.fromisoformat(timestamp_str)
+            return dt.strftime("%Y-%m-%d %H:%M:%S")
+    except Exception as e:
+        _format_logger.debug(f"Could not parse timestamp '{timestamp_str}': {e}")
+    
+    return str(timestamp_str)
+
+
+def format_metadata_value(key: str, value) -> str:
+    """Format a metadata value based on its key for display.
+    
+    This is the central formatting rule for all metadata display.
+    
+    Args:
+        key: Metadata field name
+        value: Value to format
+        
+    Returns:
+        Formatted string for display
+    """
+    if value is None or value == '':
+        return "N/A"
+    
+    # Apply field-specific formatting
+    if key in ('size', 'file_size'):
+        return format_bytes(value)
+    elif key in ('duration', 'length'):
+        return format_duration(value)
+    elif key in ('time_modified', 'time_imported', 'created_at', 'updated_at', 'indexed_at', 'timestamp'):
+        return format_timestamp(value)
+    else:
+        return str(value)
+
+
+# ============================================================================
+# Link Utilities - Consolidated from link_utils.py
+# ============================================================================
+"""Link utilities - Extract and process URLs from various sources."""
+
+
+def extract_link_from_args(args: Iterable[str]) -> Any | None:
+    """Extract HTTP/HTTPS URL from command arguments.
+    
+    Args:
+        args: Command arguments
+        
+    Returns:
+        URL string if found, None otherwise
+    """
+    args_list = list(args) if not isinstance(args, (list, tuple)) else args
+    if not args_list or len(args_list) == 0:
+        return None
+    
+    potential_link = str(args_list[0])
+    if potential_link.startswith(('http://', 'https://')):
+        return potential_link
+    
+    return None
+
+
+def extract_link_from_result(result: Any) -> Any | None:
+    """Extract URL from a result object (dict or object with attributes).
+    
+    Args:
+        result: Result object from pipeline (dict or object)
+        
+    Returns:
+        URL string if found, None otherwise
+    """
+    if isinstance(result, dict):
+        return result.get('url') or result.get('link') or result.get('href')
+    
+    return (
+        getattr(result, 'url', None) or 
+        getattr(result, 'link', None) or 
+        getattr(result, 'href', None)
+    )
+
+
+def extract_link(result: Any, args: Iterable[str]) -> Any | None:
+    """Extract link from args or result (args take priority).
+    
+    Args:
+        result: Pipeline result object
+        args: Command arguments
+        
+    Returns:
+        URL string if found, None otherwise
+    """
+    # Try args first
+    link = extract_link_from_args(args)
+    if link:
+        return link
+    
+    # Fall back to result
+    return extract_link_from_result(result)
+
+
+def get_api_key(config: dict[str, Any], service: str, key_path: str) -> str | None:
+    """Get API key from config with fallback support.
+    
+    Args:
+        config: Configuration dictionary
+        service: Service name for logging
+        key_path: Dot-notation path to key (e.g., "Debrid.All-debrid")
+        
+    Returns:
+        API key if found and not empty, None otherwise
+    """
+    try:
+        parts = key_path.split('.')
+        value = config
+        for part in parts:
+            if isinstance(value, dict):
+                value = value.get(part)
+            else:
+                return None
+        
+        if isinstance(value, str):
+            return value.strip() or None
+        
+        return None
+    except Exception:
+        return None
+
+
+def add_direct_link_to_result(result: Any, direct_link: str, original_link: str) -> None:
+    """Add direct link information to result object.
+    
+    Args:
+        result: Result object to modify (dict or object)
+        direct_link: The unlocked/direct URL
+        original_link: The original restricted URL
+    """
+    if isinstance(result, dict):
+        result['direct_link'] = direct_link
+        result['original_link'] = original_link
+    else:
+        setattr(result, 'direct_link', direct_link)
+        setattr(result, 'original_link', original_link)
+
+
+# ============================================================================
+# URL Policy Resolution - Consolidated from url_parser.py
+# ============================================================================
+"""URL policy resolution for downlow workflows."""
+
+
+@dataclass(slots=True)
+class UrlPolicy:
+    """Describe how a URL should be handled by download and screenshot flows."""
+
+    skip_download: bool = False
+    skip_metadata: bool = False
+    force_screenshot: bool = False
+    extra_tags: list[str] = field(default_factory=list)
+
+    def apply_tags(self, sources: Iterable[str]) -> list[str]:
+        tags = [tag.strip() for tag in self.extra_tags if tag and tag.strip()]
+        for value in sources:
+            text = str(value).strip()
+            if text:
+                tags.append(text)
+        return tags
+
+
+def _normalise_rule(rule: dict[str, Any]) -> dict[str, Any] | None:
+    pattern = str(rule.get("pattern") or rule.get("host") or "").strip()
+    if not pattern:
+        return None
+    skip_download = bool(rule.get("skip_download"))
+    skip_metadata = bool(rule.get("skip_metadata"))
+    force_screenshot = bool(rule.get("force_screenshot"))
+    extra_tags_raw = rule.get("extra_tags")
+    if isinstance(extra_tags_raw, str):
+        extra_tags = [part.strip() for part in extra_tags_raw.split(",") if part.strip()]
+    elif isinstance(extra_tags_raw, (list, tuple, set)):
+        extra_tags = [str(item).strip() for item in extra_tags_raw if str(item).strip()]
+    else:
+        extra_tags = []
+    return {
+        "pattern": pattern,
+        "skip_download": skip_download,
+        "skip_metadata": skip_metadata,
+        "force_screenshot": force_screenshot,
+        "extra_tags": extra_tags,
+    }
+
+
+def resolve_url_policy(config: dict[str, Any], url: str) -> UrlPolicy:
+    policies_raw = config.get("url_policies")
+    if not policies_raw:
+        return UrlPolicy()
+    if not isinstance(policies_raw, list):
+        return UrlPolicy()
+    parsed = urlparse(url)
+    subject = f"{parsed.netloc}{parsed.path}"
+    host = parsed.netloc
+    resolved = UrlPolicy()
+    for rule_raw in policies_raw:
+        if not isinstance(rule_raw, dict):
+            continue
+        rule = _normalise_rule(rule_raw)
+        if rule is None:
+            continue
+        pattern = rule["pattern"]
+        if not (fnmatch(host, pattern) or fnmatch(subject, pattern)):
+            continue
+        if rule["skip_download"]:
+            resolved.skip_download = True
+        if rule["skip_metadata"]:
+            resolved.skip_metadata = True
+        if rule["force_screenshot"]:
+            resolved.force_screenshot = True
+        if rule["extra_tags"]:
+            for tag in rule["extra_tags"]:
+                if tag not in resolved.extra_tags:
+                    resolved.extra_tags.append(tag)
+    return resolved
--- a/helper/utils_constant.py
+++ b/helper/utils_constant.py
@@ -0,0 +1,79 @@
+mime_maps = {
+  "image": {
+    "jpg":  { "ext": ".jpg", "mimes": ["image/jpeg", "image/jpg"] },
+    "png":  { "ext": ".png", "mimes": ["image/png"] },
+    "gif":  { "ext": ".gif", "mimes": ["image/gif"] },
+    "webp": { "ext": ".webp", "mimes": ["image/webp"] },
+    "avif": { "ext": ".avif", "mimes": ["image/avif"] },
+    "jxl":  { "ext": ".jxl", "mimes": ["image/jxl"] },
+    "bmp":  { "ext": ".bmp", "mimes": ["image/bmp"] },
+    "heic": { "ext": ".heic", "mimes": ["image/heic"] },
+    "heif": { "ext": ".heif", "mimes": ["image/heif"] },
+    "ico":  { "ext": ".ico", "mimes": ["image/x-icon", "image/vnd.microsoft.icon"] },
+    "qoi":  { "ext": ".qoi", "mimes": ["image/qoi"] },
+    "tiff": { "ext": ".tiff", "mimes": ["image/tiff", "image/x-tiff"] },
+    "svg":  { "ext": ".svg", "mimes": ["image/svg+xml"] }
+  },
+  "image_sequence": {
+    "apng":  { "ext": ".apng", "mimes": ["image/apng"], "sequence": True },
+    "avifs": { "ext": ".avifs", "mimes": ["image/avif-sequence"], "sequence": True },
+    "heics": { "ext": ".heics", "mimes": ["image/heic-sequence"], "sequence": True },
+    "heifs": { "ext": ".heifs", "mimes": ["image/heif-sequence"], "sequence": True }
+  },
+  "video": {
+    "mp4": { "ext": ".mp4", "mimes": ["video/mp4", "audio/mp4"] },
+    "webm": { "ext": ".webm", "mimes": ["video/webm", "audio/webm"] },
+    "mov":  { "ext": ".mov", "mimes": ["video/quicktime"] },
+    "ogv":  { "ext": ".ogv", "mimes": ["video/ogg"] },
+    "mpeg": { "ext": ".mpeg", "mimes": ["video/mpeg"] },
+    "avi":  { "ext": ".avi", "mimes": ["video/x-msvideo", "video/avi"] },
+    "flv":  { "ext": ".flv", "mimes": ["video/x-flv"] },
+    "mkv":  { "ext": ".mkv", "mimes": ["video/x-matroska", "application/x-matroska"], "audio_only_ext": ".mka" },
+    "wmv":  { "ext": ".wmv", "mimes": ["video/x-ms-wmv"] },
+    "rv":   { "ext": ".rv",  "mimes": ["video/vnd.rn-realvideo"] }
+  },
+  "audio": {
+    "mp3":  { "ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"] },
+    "m4a":  { "ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"] },
+    "ogg":  { "ext": ".ogg", "mimes": ["audio/ogg"] },
+    "flac": { "ext": ".flac", "mimes": ["audio/flac"] },
+    "wav":  { "ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"] },
+    "wma":  { "ext": ".wma", "mimes": ["audio/x-ms-wma"] },
+    "tta":  { "ext": ".tta", "mimes": ["audio/x-tta"] },
+    "wv":   { "ext": ".wv",  "mimes": ["audio/x-wavpack", "audio/wavpack"] },
+    "mka":  { "ext": ".mka", "mimes": ["audio/x-matroska", "video/x-matroska"] }
+  },
+  "document": {
+    "pdf":  { "ext": ".pdf", "mimes": ["application/pdf"] },
+    "epub": { "ext": ".epub", "mimes": ["application/epub+zip"] },
+    "djvu": { "ext": ".djvu", "mimes": ["application/vnd.djvu"] },
+    "rtf":  { "ext": ".rtf", "mimes": ["application/rtf"] },
+    "docx": { "ext": ".docx", "mimes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] },
+    "xlsx": { "ext": ".xlsx", "mimes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] },
+    "pptx": { "ext": ".pptx", "mimes": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] },
+    "doc":  { "ext": ".doc", "mimes": ["application/msword"] },
+    "xls":  { "ext": ".xls", "mimes": ["application/vnd.ms-excel"] },
+    "ppt":  { "ext": ".ppt", "mimes": ["application/vnd.ms-powerpoint"] }
+  },
+  "archive": {
+    "zip":  { "ext": ".zip", "mimes": ["application/zip"] },
+    "7z":   { "ext": ".7z",  "mimes": ["application/x-7z-compressed"] },
+    "rar":  { "ext": ".rar", "mimes": ["application/x-rar-compressed", "application/vnd.rar"] },
+    "gz":   { "ext": ".gz",  "mimes": ["application/gzip", "application/x-gzip"] },
+    "tar":  { "ext": ".tar", "mimes": ["application/x-tar"] },
+    "cbz":  { "ext": ".cbz", "mimes": ["application/zip"], "note": "zip archive of images; prefer extension-based detection for comics" }
+  },
+  "project": {
+    "clip":      { "ext": ".clip", "mimes": ["application/clip"] },
+    "kra":       { "ext": ".kra",  "mimes": ["application/x-krita"] },
+    "procreate": { "ext": ".procreate", "mimes": ["application/x-procreate"] },
+    "psd":       { "ext": ".psd",  "mimes": ["image/vnd.adobe.photoshop"] },
+    "swf":       { "ext": ".swf",  "mimes": ["application/x-shockwave-flash"] }
+  },
+  "other": {
+    "octet-stream": { "ext": "", "mimes": ["application/octet-stream"] },
+    "json":  { "ext": ".json",  "mimes": ["application/json"] },
+    "xml":   { "ext": ".xml",   "mimes": ["application/xml", "text/xml"] },
+    "csv":   { "ext": ".csv",   "mimes": ["text/csv"] }
+  }
+}
--- a/helper/worker_manager.py
+++ b/helper/worker_manager.py
@@ -0,0 +1,655 @@
+"""Worker task management with persistent database storage.
+
+Manages worker tasks for downloads, searches, imports, etc. with automatic
+persistence to database and optional auto-refresh callbacks.
+"""
+
+import logging
+from pathlib import Path
+from typing import Optional, Dict, Any, List, Callable
+from datetime import datetime
+from threading import Thread, Lock
+import time
+
+from .local_library import LocalLibraryDB
+from helper.logger import log
+
+logger = logging.getLogger(__name__)
+
+
+class Worker:
+    """Represents a single worker task with state management."""
+    
+    def __init__(self, worker_id: str, worker_type: str, title: str = "", 
+                 description: str = "", manager: Optional['WorkerManager'] = None):
+        """Initialize a worker.
+        
+        Args:
+            worker_id: Unique identifier for this worker
+            worker_type: Type of work (e.g., 'download', 'search', 'import')
+            title: Human-readable title
+            description: Detailed description
+            manager: Reference to parent WorkerManager for state updates
+        """
+        self.id = worker_id
+        self.type = worker_type
+        self.title = title or worker_type
+        self.description = description
+        self.manager = manager
+        self.status = "running"
+        self.progress = ""
+        self.details = ""
+        self.error_message = ""
+        self.result = "pending"
+        self._stdout_buffer = []
+        self._steps_buffer = []
+    
+    def log_step(self, step_text: str) -> None:
+        """Log a step for this worker.
+        
+        Args:
+            step_text: Text describing the step
+        """
+        try:
+            if self.manager:
+                self.manager.log_step(self.id, step_text)
+            else:
+                logger.info(f"[{self.id}] {step_text}")
+        except Exception as e:
+            logger.error(f"Error logging step for worker {self.id}: {e}")
+    
+    def append_stdout(self, text: str) -> None:
+        """Append text to stdout log.
+        
+        Args:
+            text: Text to append
+        """
+        try:
+            if self.manager:
+                self.manager.append_worker_stdout(self.id, text)
+            else:
+                self._stdout_buffer.append(text)
+        except Exception as e:
+            logger.error(f"Error appending stdout for worker {self.id}: {e}")
+    
+    def get_stdout(self) -> str:
+        """Get all stdout for this worker.
+        
+        Returns:
+            Complete stdout text
+        """
+        try:
+            if self.manager:
+                return self.manager.get_stdout(self.id)
+            else:
+                return "\n".join(self._stdout_buffer)
+        except Exception as e:
+            logger.error(f"Error getting stdout for worker {self.id}: {e}")
+            return ""
+    
+    def get_steps(self) -> str:
+        """Get all steps for this worker.
+        
+        Returns:
+            Complete steps text
+        """
+        try:
+            if self.manager:
+                return self.manager.get_steps(self.id)
+            else:
+                return "\n".join(self._steps_buffer)
+        except Exception as e:
+            logger.error(f"Error getting steps for worker {self.id}: {e}")
+            return ""
+    
+    def update_progress(self, progress: str = "", details: str = "") -> None:
+        """Update worker progress.
+        
+        Args:
+            progress: Progress string (e.g., "50%")
+            details: Additional details
+        """
+        self.progress = progress
+        self.details = details
+        try:
+            if self.manager:
+                self.manager.update_worker(self.id, progress, details)
+        except Exception as e:
+            logger.error(f"Error updating worker {self.id}: {e}")
+    
+    def finish(self, result: str = "completed", message: str = "") -> None:
+        """Mark worker as finished.
+        
+        Args:
+            result: Result status ('completed', 'error', 'cancelled')
+            message: Result message/error details
+        """
+        self.result = result
+        self.status = "finished"
+        self.error_message = message
+        try:
+            if self.manager:
+                # Flush and disable logging handler before marking finished
+                self.manager.disable_logging_for_worker(self.id)
+                # Then mark as finished in database
+                self.manager.finish_worker(self.id, result, message)
+        except Exception as e:
+            logger.error(f"Error finishing worker {self.id}: {e}")
+
+
+class WorkerLoggingHandler(logging.StreamHandler):
+    """Custom logging handler that captures logs for a worker."""
+    
+    def __init__(self, worker_id: str, db: LocalLibraryDB,
+                 manager: Optional['WorkerManager'] = None,
+                 buffer_size: int = 50):
+        """Initialize the handler.
+        
+        Args:
+            worker_id: ID of the worker to capture logs for
+            db: Reference to LocalLibraryDB for storing logs
+            buffer_size: Number of logs to buffer before flushing to DB
+        """
+        super().__init__()
+        self.worker_id = worker_id
+        self.db = db
+        self.manager = manager
+        self.buffer_size = buffer_size
+        self.buffer = []
+        self._lock = Lock()
+        
+        # Set a format that includes timestamp and level
+        formatter = logging.Formatter(
+            '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+            datefmt='%Y-%m-%d %H:%M:%S'
+        )
+        self.setFormatter(formatter)
+    
+    def emit(self, record):
+        """Emit a log record."""
+        try:
+            # Try to format the record normally
+            try:
+                msg = self.format(record)
+            except (TypeError, ValueError):
+                # If formatting fails (e.g., %d format with non-int arg),
+                # build message manually without calling getMessage()
+                try:
+                    # Try to format with args if possible
+                    if record.args:
+                        msg = record.msg % record.args
+                    else:
+                        msg = record.msg
+                except (TypeError, ValueError):
+                    # If that fails too, just use the raw message string
+                    msg = str(record.msg)
+                
+                # Add timestamp and level if not already in message
+                import time
+                timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(record.created))
+                msg = f"{timestamp} - {record.name} - {record.levelname} - {msg}"
+            
+            with self._lock:
+                self.buffer.append(msg)
+                
+                # Flush to DB when buffer reaches size
+                if len(self.buffer) >= self.buffer_size:
+                    self._flush()
+        except Exception:
+            self.handleError(record)
+    
+    def _flush(self):
+        """Flush buffered logs to database."""
+        if self.buffer:
+            log_text = '\n'.join(self.buffer)
+            try:
+                if self.manager:
+                    self.manager.append_worker_stdout(self.worker_id, log_text, channel='log')
+                else:
+                    self.db.append_worker_stdout(self.worker_id, log_text, channel='log')
+            except Exception as e:
+                # If we can't write to DB, at least log it
+                log(f"Error flushing worker logs: {e}")
+            self.buffer = []
+    
+    def flush(self):
+        """Flush any buffered records."""
+        with self._lock:
+            self._flush()
+        super().flush()
+    
+    def close(self):
+        """Close the handler."""
+        self.flush()
+        super().close()
+
+
+class WorkerManager:
+    """Manages persistent worker tasks with auto-refresh capability."""
+    
+    def __init__(self, library_root: Path, auto_refresh_interval: float = 2.0):
+        """Initialize the worker manager.
+        
+        Args:
+            library_root: Root directory for the local library database
+            auto_refresh_interval: Seconds between auto-refresh checks (0 = disabled)
+        """
+        self.library_root = Path(library_root)
+        self.db = LocalLibraryDB(library_root)
+        self.auto_refresh_interval = auto_refresh_interval
+        self.refresh_callbacks: List[Callable] = []
+        self.refresh_thread: Optional[Thread] = None
+        self._stop_refresh = False
+        self._lock = Lock()
+        self.worker_handlers: Dict[str, WorkerLoggingHandler] = {}  # Track active handlers
+        self._worker_last_step: Dict[str, str] = {}
+    
+    def add_refresh_callback(self, callback: Callable[[List[Dict[str, Any]]], None]) -> None:
+        """Register a callback to be called on worker updates.
+        
+        Args:
+            callback: Function that receives list of active workers
+        """
+        with self._lock:
+            self.refresh_callbacks.append(callback)
+
+    def expire_running_workers(
+        self,
+        older_than_seconds: int = 300,
+        worker_id_prefix: Optional[str] = None,
+        reason: Optional[str] = None,
+        status: str = "error",
+    ) -> int:
+        """Mark stale running workers as finished.
+
+        Args:
+            older_than_seconds: Idle threshold before expiring.
+            worker_id_prefix: Optional wildcard filter (e.g., 'cli_%').
+            reason: Error message if none already exists.
+            status: New status to apply.
+
+        Returns:
+            Count of workers updated.
+        """
+        try:
+            return self.db.expire_running_workers(
+                older_than_seconds=older_than_seconds,
+                status=status,
+                reason=reason,
+                worker_id_prefix=worker_id_prefix,
+            )
+        except Exception as exc:
+            logger.error(f"Failed to expire stale workers: {exc}", exc_info=True)
+            return 0
+    
+    def remove_refresh_callback(self, callback: Callable) -> None:
+        """Remove a refresh callback.
+        
+        Args:
+            callback: The callback function to remove
+        """
+        with self._lock:
+            if callback in self.refresh_callbacks:
+                self.refresh_callbacks.remove(callback)
+    
+    def enable_logging_for_worker(self, worker_id: str) -> Optional[WorkerLoggingHandler]:
+        """Enable logging capture for a worker.
+        
+        Creates a logging handler that captures all logs for this worker.
+        
+        Args:
+            worker_id: ID of the worker to capture logs for
+            
+        Returns:
+            The logging handler that was created, or None if there was an error
+        """
+        try:
+            handler = WorkerLoggingHandler(worker_id, self.db, manager=self)
+            with self._lock:
+                self.worker_handlers[worker_id] = handler
+            
+            # Add the handler to the root logger so it captures all logs
+            root_logger = logging.getLogger()
+            root_logger.addHandler(handler)
+            root_logger.setLevel(logging.DEBUG)  # Capture all levels
+            
+            logger.debug(f"[WorkerManager] Enabled logging for worker: {worker_id}")
+            return handler
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True)
+            return None
+    
+    def disable_logging_for_worker(self, worker_id: str) -> None:
+        """Disable logging capture for a worker and flush any pending logs.
+        
+        Args:
+            worker_id: ID of the worker to stop capturing logs for
+        """
+        try:
+            with self._lock:
+                handler = self.worker_handlers.pop(worker_id, None)
+            
+            if handler:
+                # Flush and close the handler
+                handler.flush()
+                handler.close()
+                
+                # Remove from root logger
+                root_logger = logging.getLogger()
+                root_logger.removeHandler(handler)
+                
+                logger.debug(f"[WorkerManager] Disabled logging for worker: {worker_id}")
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}", exc_info=True)
+    
+    def track_worker(self, worker_id: str, worker_type: str, title: str = "", 
+                    description: str = "", total_steps: int = 0,
+                    pipe: Optional[str] = None) -> bool:
+        """Start tracking a new worker.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            worker_type: Type of worker (e.g., 'download', 'search', 'import')
+            title: Worker title/name
+            description: Worker description
+            total_steps: Total number of steps for progress tracking
+            pipe: Text of the originating pipe/prompt, if any
+            
+        Returns:
+            True if worker was inserted successfully
+        """
+        try:
+            result = self.db.insert_worker(worker_id, worker_type, title, description, total_steps, pipe=pipe)
+            if result > 0:
+                logger.debug(f"[WorkerManager] Tracking worker: {worker_id} ({worker_type})")
+                self._start_refresh_if_needed()
+                return True
+            return False
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error tracking worker: {e}", exc_info=True)
+            return False
+    
+    def update_worker(self, worker_id: str, progress: float = 0.0, current_step: str = "",
+                     details: str = "", error: str = "") -> bool:
+        """Update worker progress and status.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            progress: Progress percentage (0-100)
+            current_step: Current step description
+            details: Additional details
+            error: Error message if any
+            
+        Returns:
+            True if update was successful
+        """
+        try:
+            kwargs = {}
+            if progress > 0:
+                kwargs['progress'] = progress
+            if current_step:
+                kwargs['current_step'] = current_step
+            if details:
+                kwargs['description'] = details
+            if error:
+                kwargs['error_message'] = error
+            
+            if kwargs:
+                kwargs['last_updated'] = datetime.now().isoformat()
+                if 'current_step' in kwargs and kwargs['current_step']:
+                    self._worker_last_step[worker_id] = str(kwargs['current_step'])
+                return self.db.update_worker(worker_id, **kwargs)
+            return True
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error updating worker {worker_id}: {e}", exc_info=True)
+            return False
+    
+    def finish_worker(self, worker_id: str, result: str = "completed", 
+                     error_msg: str = "", result_data: str = "") -> bool:
+        """Mark a worker as finished.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            result: Result status ('completed', 'error', 'cancelled')
+            error_msg: Error message if any
+            result_data: Result data as JSON string
+            
+        Returns:
+            True if update was successful
+        """
+        try:
+            kwargs = {
+                'status': result,
+                'completed_at': datetime.now().isoformat()
+            }
+            if error_msg:
+                kwargs['error_message'] = error_msg
+            if result_data:
+                kwargs['result_data'] = result_data
+            
+            success = self.db.update_worker(worker_id, **kwargs)
+            logger.info(f"[WorkerManager] Worker finished: {worker_id} ({result})")
+            self._worker_last_step.pop(worker_id, None)
+            return success
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error finishing worker {worker_id}: {e}", exc_info=True)
+            return False
+    
+    def get_active_workers(self) -> List[Dict[str, Any]]:
+        """Get all active (running) workers.
+        
+        Returns:
+            List of active worker dictionaries
+        """
+        try:
+            return self.db.get_active_workers()
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error getting active workers: {e}", exc_info=True)
+            return []
+    
+    def get_finished_workers(self, limit: int = 100) -> List[Dict[str, Any]]:
+        """Get all finished workers (completed, errored, or cancelled).
+        
+        Args:
+            limit: Maximum number of workers to retrieve
+        
+        Returns:
+            List of finished worker dictionaries
+        """
+        try:
+            all_workers = self.db.get_all_workers(limit=limit)
+            # Filter to only finished workers
+            finished = [w for w in all_workers if w.get('status') in ['completed', 'error', 'cancelled']]
+            return finished
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error getting finished workers: {e}", exc_info=True)
+            return []
+    
+    def get_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
+        """Get a specific worker's data.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            
+        Returns:
+            Worker data or None if not found
+        """
+        try:
+            return self.db.get_worker(worker_id)
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error getting worker {worker_id}: {e}", exc_info=True)
+            return None
+    
+    def get_worker_events(self, worker_id: str, limit: int = 500) -> List[Dict[str, Any]]:
+        """Fetch recorded worker timeline events."""
+        return self.db.get_worker_events(worker_id, limit)
+    
+    def log_step(self, worker_id: str, step_text: str) -> bool:
+        """Log a step to a worker's step history.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            step_text: Step description to log
+            
+        Returns:
+            True if successful
+        """
+        try:
+            success = self.db.append_worker_steps(worker_id, step_text)
+            if success:
+                self._worker_last_step[worker_id] = step_text
+            return success
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True)
+            return False
+    
+    def _get_last_step(self, worker_id: str) -> Optional[str]:
+        """Return the most recent step description for a worker."""
+        return self._worker_last_step.get(worker_id)
+    
+    def get_steps(self, worker_id: str) -> str:
+        """Get step logs for a worker.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            
+        Returns:
+            Steps text or empty string if not found
+        """
+        try:
+            return self.db.get_worker_steps(worker_id)
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True)
+            return ''
+    
+    def start_auto_refresh(self) -> None:
+        """Start the auto-refresh thread for periodic worker updates."""
+        if self.auto_refresh_interval <= 0:
+            logger.debug("[WorkerManager] Auto-refresh disabled (interval <= 0)")
+            return
+        
+        if self.refresh_thread and self.refresh_thread.is_alive():
+            logger.debug("[WorkerManager] Auto-refresh already running")
+            return
+        
+        logger.info(f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval")
+        self._stop_refresh = False
+        self.refresh_thread = Thread(target=self._auto_refresh_loop, daemon=True)
+        self.refresh_thread.start()
+    
+    def stop_auto_refresh(self) -> None:
+        """Stop the auto-refresh thread."""
+        logger.info("[WorkerManager] Stopping auto-refresh")
+        self._stop_refresh = True
+        if self.refresh_thread:
+            self.refresh_thread.join(timeout=5)
+            self.refresh_thread = None
+    
+    def _start_refresh_if_needed(self) -> None:
+        """Start auto-refresh if we have active workers and callbacks."""
+        active = self.get_active_workers()
+        if active and self.refresh_callbacks and not self._stop_refresh:
+            self.start_auto_refresh()
+    
+    def _auto_refresh_loop(self) -> None:
+        """Main auto-refresh loop that periodically queries and notifies."""
+        try:
+            while not self._stop_refresh:
+                time.sleep(self.auto_refresh_interval)
+                
+                # Check if there are active workers
+                active = self.get_active_workers()
+                
+                if not active:
+                    # No more active workers, stop refreshing
+                    logger.debug("[WorkerManager] No active workers, stopping auto-refresh")
+                    break
+                
+                # Call all registered callbacks with the active workers
+                with self._lock:
+                    for callback in self.refresh_callbacks:
+                        try:
+                            callback(active)
+                        except Exception as e:
+                            logger.error(f"[WorkerManager] Error in refresh callback: {e}", exc_info=True)
+        
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error in auto-refresh loop: {e}", exc_info=True)
+        finally:
+            logger.debug("[WorkerManager] Auto-refresh loop ended")
+    
+    def cleanup_old_workers(self, days: int = 7) -> int:
+        """Clean up completed/errored workers older than specified days.
+        
+        Args:
+            days: Delete workers completed more than this many days ago
+            
+        Returns:
+            Number of workers deleted
+        """
+        try:
+            count = self.db.cleanup_old_workers(days)
+            if count > 0:
+                logger.info(f"[WorkerManager] Cleaned up {count} old workers")
+            return count
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error cleaning up old workers: {e}", exc_info=True)
+            return 0
+    
+    def append_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
+        """Append text to a worker's stdout log.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            text: Text to append
+            channel: Logical channel (stdout, stderr, log, etc.)
+            
+        Returns:
+            True if append was successful
+        """
+        try:
+            step_label = self._get_last_step(worker_id)
+            return self.db.append_worker_stdout(worker_id, text, step=step_label, channel=channel)
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
+            return False
+    
+    def get_stdout(self, worker_id: str) -> str:
+        """Get stdout logs for a worker.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            
+        Returns:
+            Worker's stdout or empty string
+        """
+        try:
+            return self.db.get_worker_stdout(worker_id)
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error getting stdout: {e}", exc_info=True)
+            return ""
+    
+    def append_worker_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
+        """Compatibility wrapper for append_stdout."""
+        return self.append_stdout(worker_id, text, channel=channel)
+    
+    def clear_stdout(self, worker_id: str) -> bool:
+        """Clear stdout logs for a worker.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            
+        Returns:
+            True if clear was successful
+        """
+        try:
+            return self.db.clear_worker_stdout(worker_id)
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error clearing stdout: {e}", exc_info=True)
+            return False
+    
+    def close(self) -> None:
+        """Close the worker manager and database connection."""
+        self.stop_auto_refresh()
+        self.db.close()
+        logger.info("[WorkerManager] Closed")