dfdsf

2025-12-11 19:04:02 -08:00
parent 6863c6c7ea
commit 16d8a763cd
103 changed files with 4759 additions and 9156 deletions
--- a/SYS/background_notifier.py
+++ b/SYS/background_notifier.py
@@ -0,0 +1,195 @@
+"""Lightweight console notifier for background WorkerManager tasks.
+
+Registers a refresh callback on WorkerManager and prints concise updates when
+workers start, progress, or finish. Intended for CLI background workflows.
+
+Filters to show only workers related to the current pipeline session to avoid
+cluttering the terminal with workers from previous sessions.
+"""
+from __future__ import annotations
+
+from typing import Any, Callable, Dict, Optional, Set
+
+from SYS.logger import log, debug
+
+
+class BackgroundNotifier:
+    """Simple notifier that prints worker status changes for a session."""
+
+    def __init__(
+        self,
+        manager: Any,
+        output: Callable[[str], None] = log,
+        session_worker_ids: Optional[Set[str]] = None,
+        only_terminal_updates: bool = False,
+        overlay_mode: bool = False,
+    ) -> None:
+        self.manager = manager
+        self.output = output
+        self.session_worker_ids = session_worker_ids if session_worker_ids is not None else set()
+        self.only_terminal_updates = only_terminal_updates
+        self.overlay_mode = overlay_mode
+        self._filter_enabled = session_worker_ids is not None
+        self._last_state: Dict[str, str] = {}
+
+        try:
+            self.manager.add_refresh_callback(self._on_refresh)
+            self.manager.start_auto_refresh()
+        except Exception as exc:  # pragma: no cover - best effort
+            debug(f"[notifier] Could not attach refresh callback: {exc}")
+
+    def _render_line(self, worker: Dict[str, Any]) -> Optional[str]:
+        # Use worker_id (the actual worker ID we set) for filtering and display
+        worker_id = str(worker.get("worker_id") or "").strip()
+        if not worker_id:
+            # Fallback to database id if worker_id is not set
+            worker_id = str(worker.get("id") or "").strip()
+        if not worker_id:
+            return None
+
+        status = str(worker.get("status") or "running")
+        progress_val = worker.get("progress") or worker.get("progress_percent")
+        progress = ""
+        if isinstance(progress_val, (int, float)):
+            progress = f" {progress_val:.1f}%"
+        elif progress_val:
+            progress = f" {progress_val}"
+
+        step = str(worker.get("current_step") or worker.get("description") or "").strip()
+        parts = [f"[worker:{worker_id}] {status}{progress}"]
+        if step:
+            parts.append(step)
+        return " - ".join(parts)
+
+    def _on_refresh(self, workers: list[Dict[str, Any]]) -> None:
+        overlay_active_workers = 0
+
+        for worker in workers:
+            # Use worker_id (the actual worker ID we set) for filtering
+            worker_id = str(worker.get("worker_id") or "").strip()
+            if not worker_id:
+                # Fallback to database id if worker_id is not set
+                worker_id = str(worker.get("id") or "").strip()
+            if not worker_id:
+                continue
+            
+            # If filtering is enabled, skip workers not in this session
+            if self._filter_enabled and worker_id not in self.session_worker_ids:
+                continue
+            
+            status = str(worker.get("status") or "running")
+
+            # Overlay mode: only emit on completion; suppress start/progress spam
+            if self.overlay_mode:
+                if status in ("completed", "finished", "error"):
+                    progress_val = worker.get("progress") or worker.get("progress_percent") or ""
+                    step = str(worker.get("current_step") or worker.get("description") or "").strip()
+                    signature = f"{status}|{progress_val}|{step}"
+
+                    if self._last_state.get(worker_id) == signature:
+                        continue
+
+                    self._last_state[worker_id] = signature
+                    line = self._render_line(worker)
+                    if line:
+                        try:
+                            self.output(line)
+                        except Exception:
+                            pass
+
+                    self._last_state.pop(worker_id, None)
+                    self.session_worker_ids.discard(worker_id)
+                continue
+
+            # For terminal-only mode, emit once when the worker finishes and skip intermediate updates
+            if self.only_terminal_updates:
+                if status in ("completed", "finished", "error"):
+                    if self._last_state.get(worker_id) == status:
+                        continue
+                    self._last_state[worker_id] = status
+                    line = self._render_line(worker)
+                    if line:
+                        try:
+                            self.output(line)
+                        except Exception:
+                            pass
+                    # Stop tracking this worker after terminal notification
+                    self.session_worker_ids.discard(worker_id)
+                continue
+
+            # Skip finished workers after showing them once (standard verbose mode)
+            if status in ("completed", "finished", "error"):
+                if worker_id in self._last_state:
+                    # Already shown, remove from tracking
+                    self._last_state.pop(worker_id, None)
+                    self.session_worker_ids.discard(worker_id)
+                continue
+
+            progress_val = worker.get("progress") or worker.get("progress_percent") or ""
+            step = str(worker.get("current_step") or worker.get("description") or "").strip()
+            signature = f"{status}|{progress_val}|{step}"
+
+            if self._last_state.get(worker_id) == signature:
+                continue
+
+            self._last_state[worker_id] = signature
+            line = self._render_line(worker)
+            if line:
+                try:
+                    self.output(line)
+                except Exception:
+                    pass
+
+        if self.overlay_mode:
+            try:
+                # If nothing active for this session, clear the overlay text
+                if overlay_active_workers == 0:
+                    self.output("")
+            except Exception:
+                pass
+
+
+def ensure_background_notifier(
+    manager: Any,
+    output: Callable[[str], None] = log,
+    session_worker_ids: Optional[Set[str]] = None,
+    only_terminal_updates: bool = False,
+    overlay_mode: bool = False,
+) -> Optional[BackgroundNotifier]:
+    """Attach a BackgroundNotifier to a WorkerManager if not already present.
+    
+    Args:
+        manager: WorkerManager instance
+        output: Function to call for printing updates
+        session_worker_ids: Set of worker IDs belonging to this pipeline session.
+                           If None, show all workers. If a set (even empty), only show workers in that set.
+    """
+    if manager is None:
+        return None
+
+    existing = getattr(manager, "_background_notifier", None)
+    if isinstance(existing, BackgroundNotifier):
+        # Update session IDs if provided
+        if session_worker_ids is not None:
+            existing._filter_enabled = True
+            existing.session_worker_ids.update(session_worker_ids)
+        # Respect the most restrictive setting for terminal-only updates
+        if only_terminal_updates:
+            existing.only_terminal_updates = True
+        # Enable overlay mode if requested later
+        if overlay_mode:
+            existing.overlay_mode = True
+        return existing
+
+    notifier = BackgroundNotifier(
+        manager,
+        output,
+        session_worker_ids=session_worker_ids,
+        only_terminal_updates=only_terminal_updates,
+        overlay_mode=overlay_mode,
+    )
+    try:
+        manager._background_notifier = notifier  # type: ignore[attr-defined]
+    except Exception:
+        pass
+    return notifier
--- a/SYS/download.py
+++ b/SYS/download.py
@@ -0,0 +1,767 @@
+"""Download media files using yt-dlp with support for direct file downloads.
+
+Lean, focused downloader without event infrastructure overhead.
+- yt-dlp integration for streaming sites
+- Direct file download fallback for PDFs, images, documents
+- Tag extraction via metadata.extract_ytdlp_tags()
+- Logging via helper.logger.log()
+"""
+from __future__ import annotations
+
+import glob  # noqa: F401
+import hashlib
+import json  # noqa: F401
+import random
+import re
+import string
+import subprocess
+import sys
+import time
+import traceback
+from pathlib import Path
+from typing import Any, Dict, Iterator, List, Optional
+from urllib.parse import urljoin, urlparse
+
+import httpx
+
+from SYS.logger import log, debug
+from SYS.utils import ensure_directory, sha256_file
+from API.HTTP import HTTPClient
+from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar
+
+try:
+    import yt_dlp  # type: ignore
+    from yt_dlp.extractor import gen_extractors  # type: ignore
+except Exception as exc:
+    yt_dlp = None  # type: ignore
+    YTDLP_IMPORT_ERROR = exc
+else:
+    YTDLP_IMPORT_ERROR = None
+
+try:
+    from metadata import extract_ytdlp_tags
+except ImportError:
+    extract_ytdlp_tags = None
+
+_EXTRACTOR_CACHE: List[Any] | None = None
+
+
+def _ensure_yt_dlp_ready() -> None:
+    """Verify yt-dlp is available, raise if not."""
+    if yt_dlp is not None:
+        return
+    detail = str(YTDLP_IMPORT_ERROR or "yt-dlp is not installed")
+    raise DownloadError(f"yt-dlp module not available: {detail}")
+
+
+def _progress_callback(status: Dict[str, Any]) -> None:
+    """Simple progress callback using logger."""
+    event = status.get("status")
+    if event == "downloading":
+        percent = status.get("_percent_str", "?")
+        speed = status.get("_speed_str", "?")
+        eta = status.get("_eta_str", "?")
+        sys.stdout.write(f"\r[download] {percent} at {speed} ETA {eta}   ")
+        sys.stdout.flush()
+    elif event == "finished":
+        sys.stdout.write("\r" + " " * 70 + "\r")
+        sys.stdout.flush()
+        debug(f"✓ Download finished: {status.get('filename')}")
+    elif event in ("postprocessing", "processing"):
+        debug(f"Post-processing: {status.get('postprocessor')}")
+
+
+def is_url_supported_by_ytdlp(url: str) -> bool:
+    """Check if URL is supported by yt-dlp."""
+    if yt_dlp is None:
+        return False
+    global _EXTRACTOR_CACHE
+    if _EXTRACTOR_CACHE is None:
+        try:
+            _EXTRACTOR_CACHE = [ie for ie in gen_extractors()]  # type: ignore[arg-type]
+        except Exception:
+            _EXTRACTOR_CACHE = []
+    for extractor in _EXTRACTOR_CACHE:
+        try:
+            if not extractor.suitable(url):
+                continue
+        except Exception:
+            continue
+        name = getattr(extractor, "IE_NAME", "")
+        if name.lower() == "generic":
+            continue
+        return True
+    return False
+
+
+def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
+    """Get list of available formats for a URL using yt-dlp."""
+    _ensure_yt_dlp_ready()
+
+    try:
+        ydl_opts = {
+            "quiet": True,
+            "no_warnings": True,
+            "socket_timeout": 30,
+        }
+
+        if no_playlist:
+            ydl_opts["noplaylist"] = True
+
+        if playlist_items:
+            ydl_opts["playlist_items"] = playlist_items
+
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            debug(f"Fetching format list for: {url}")
+            info = ydl.extract_info(url, download=False)
+
+            formats = info.get("formats", [])
+            if not formats:
+                log("No formats available", file=sys.stderr)
+                return None
+
+            result_formats = []
+            for fmt in formats:
+                result_formats.append({
+                    "format_id": fmt.get("format_id", ""),
+                    "format": fmt.get("format", ""),
+                    "ext": fmt.get("ext", ""),
+                    "resolution": fmt.get("resolution", ""),
+                    "width": fmt.get("width"),
+                    "height": fmt.get("height"),
+                    "fps": fmt.get("fps"),
+                    "vcodec": fmt.get("vcodec", "none"),
+                    "acodec": fmt.get("acodec", "none"),
+                    "filesize": fmt.get("filesize"),
+                    "tbr": fmt.get("tbr"),
+                })
+
+            debug(f"Found {len(result_formats)} available formats")
+            return result_formats
+
+    except Exception as e:
+        log(f"✗ Error fetching formats: {e}", file=sys.stderr)
+        return None
+
+
+def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
+    """Download each section separately so merge-file can combine them.
+    
+    yt-dlp with multiple --download-sections args merges them into one file.
+    We need separate files for merge-file, so download each section individually.
+    
+    Uses hash-based filenames for sections (not title-based) to prevent yt-dlp from
+    thinking sections are already downloaded. The title is extracted and stored in tags.
+    
+    Returns:
+        (session_id, first_section_info_dict) - session_id for finding files, info dict for metadata extraction
+    """
+    
+    sections_list = ytdl_options.get("download_sections", [])
+    if not sections_list:
+        return "", {}
+    
+    # Generate a unique hash-based ID for this download session
+    # This ensures different videos/downloads don't have filename collisions
+    session_id = hashlib.md5(
+        (url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()
+    ).hexdigest()[:12]
+    
+    first_section_info = None
+    title_from_first = None
+    
+    # Download each section separately with unique output template using session ID
+    for section_idx, section in enumerate(sections_list, 1):
+        # Build unique output template for this section using session-based filename
+        # e.g., "{session_id}_{section_idx}.ext" - simple and unique per section
+        base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
+        output_dir_path = Path(base_outtmpl).parent
+        
+        # Use session_id + section index for temp filename
+        # e.g., "/path/{session_id}_1.%(ext)s"
+        filename_tmpl = f"{session_id}_{section_idx}"
+        if base_outtmpl.endswith(".%(ext)s"):
+            filename_tmpl += ".%(ext)s"
+            
+        # Use Path to handle separators correctly for the OS
+        section_outtmpl = str(output_dir_path / filename_tmpl)
+        
+        # For the first section, extract metadata first (separate call)
+        if section_idx == 1:
+            metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
+            if ytdl_options.get("cookiefile"):
+                cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
+                metadata_cmd.extend(["--cookies", cookies_path])
+            if ytdl_options.get("noplaylist"):
+                metadata_cmd.append("--no-playlist")
+            metadata_cmd.append(url)
+            
+            try:
+                meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
+                if meta_result.returncode == 0 and meta_result.stdout:
+                    try:
+                        info_dict = json.loads(meta_result.stdout.strip())
+                        first_section_info = info_dict
+                        title_from_first = info_dict.get('title')
+                        if not quiet:
+                            debug(f"Extracted title from metadata: {title_from_first}")
+                    except json.JSONDecodeError:
+                        if not quiet:
+                            debug("Could not parse JSON metadata")
+            except Exception as e:
+                if not quiet:
+                    debug(f"Error extracting metadata: {e}")
+        
+        # Build yt-dlp command for downloading this section
+        cmd = ["yt-dlp"]
+        
+        # Add format
+        if ytdl_options.get("format"):
+            cmd.extend(["-f", ytdl_options["format"]])
+        
+        # Add ONLY this section (not all sections)
+        cmd.extend(["--download-sections", section])
+        
+        # Add force-keyframes-at-cuts if specified
+        if ytdl_options.get("force_keyframes_at_cuts"):
+            cmd.append("--force-keyframes-at-cuts")
+        
+        # Add output template for this section
+        cmd.extend(["-o", section_outtmpl])
+        
+        # Add cookies file if present
+        if ytdl_options.get("cookiefile"):
+            # Convert backslashes to forward slashes for better compatibility
+            cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
+            cmd.extend(["--cookies", cookies_path])
+        
+        # Add no-playlist if specified
+        if ytdl_options.get("noplaylist"):
+            cmd.append("--no-playlist")
+        
+        # Add the URL
+        cmd.append(url)
+        
+        if not quiet:
+            debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
+            debug(f"Command: {' '.join(cmd)}")
+        
+        # Run the subprocess - don't capture output so progress is shown
+        try:
+            result = subprocess.run(cmd)
+            
+            if result.returncode != 0:
+                raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}")
+        except Exception as exc:
+            raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc
+    
+    return session_id, first_section_info or {}
+
+
+def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
+    """Build yt-dlp download options."""
+    ensure_directory(opts.output_dir)
+
+    # Build output template
+    # When downloading sections, each section will have .section_N_of_M added by _download_with_sections_via_cli
+    outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve())
+
+    base_options: Dict[str, Any] = {
+        "outtmpl": outtmpl,
+        "quiet": True,
+        "no_warnings": True,
+        "noprogress": True,
+        "socket_timeout": 30,
+        "retries": 10,
+        "fragment_retries": 10,
+        "http_chunk_size": 10_485_760,
+        "restrictfilenames": True,
+        "progress_hooks": [] if opts.quiet else [_progress_callback],
+    }
+
+    if opts.cookies_path and opts.cookies_path.is_file():
+        base_options["cookiefile"] = str(opts.cookies_path)
+    else:
+        # Check global cookies file lazily to avoid import cycles
+        from hydrus_health_check import get_cookies_file_path  # local import
+
+        global_cookies = get_cookies_file_path()
+        if global_cookies:
+            base_options["cookiefile"] = global_cookies
+        else:
+            # Fallback to browser cookies
+            base_options["cookiesfrombrowser"] = ("chrome",)
+
+    # Add no-playlist option if specified (for single video from playlist url)
+    if opts.no_playlist:
+        base_options["noplaylist"] = True
+
+    # Configure based on mode
+    if opts.mode == "audio":
+        base_options["format"] = opts.ytdl_format or "251/140/bestaudio"
+        base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}]
+    else:  # video
+        base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
+        base_options["format_sort"] = [
+            "res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"
+        ]
+
+    # Add clip sections if provided (yt-dlp will download only these sections)
+    if opts.clip_sections:
+        # Parse section ranges like "48-65,120-152,196-205" (seconds) 
+        # and convert to yt-dlp format: "*HH:MM:SS-HH:MM:SS,*HH:MM:SS-HH:MM:SS"
+        sections = []
+        for section_range in opts.clip_sections.split(','):
+            try:
+                start_str, end_str = section_range.strip().split('-')
+                start_sec = float(start_str)
+                end_sec = float(end_str)
+                
+                # Convert seconds to HH:MM:SS format
+                def sec_to_hhmmss(seconds):
+                    hours = int(seconds // 3600)
+                    minutes = int((seconds % 3600) // 60)
+                    secs = int(seconds % 60)
+                    return f"{hours:02d}:{minutes:02d}:{secs:02d}"
+                
+                start_time = sec_to_hhmmss(start_sec)
+                end_time = sec_to_hhmmss(end_sec)
+                sections.append(f"*{start_time}-{end_time}")
+            except (ValueError, AttributeError):
+                pass
+        
+        if sections:
+            # Pass each section as a separate element in the list (yt-dlp expects multiple --download-sections args)
+            base_options["download_sections"] = sections
+            debug(f"Download sections configured: {', '.join(sections)}")
+            # Note: Not using --force-keyframes-at-cuts to avoid re-encoding
+            # This may result in less precise cuts but faster downloads
+
+    # Add playlist items selection if provided
+    if opts.playlist_items:
+        base_options["playlist_items"] = opts.playlist_items
+
+    if not opts.quiet:
+        debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}")
+    return base_options
+
+
+def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
+    """Iterate through download entries, handling playlists."""
+    queue: List[Dict[str, Any]] = [info]
+    seen: set[int] = set()
+    while queue:
+        current = queue.pop(0)
+        obj_id = id(current)
+        if obj_id in seen:
+            continue
+        seen.add(obj_id)
+        entries = current.get("entries")
+        if isinstance(entries, list):
+            for entry in entries:
+                if isinstance(entry, dict):
+                    queue.append(entry)
+        if current.get("requested_downloads") or not entries:
+            yield current
+
+
+def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]:
+    """Get candidate file paths for downloaded media."""
+    requested = entry.get("requested_downloads")
+    if isinstance(requested, list):
+        for item in requested:
+            if isinstance(item, dict):
+                for key in ("filepath", "_filename", "filename"):
+                    value = item.get(key)
+                    if value:
+                        yield Path(value)
+    for key in ("filepath", "_filename", "filename"):
+        value = entry.get(key)
+        if value:
+            yield Path(value)
+    if entry.get("filename"):
+        yield output_dir / entry["filename"]
+
+
+def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]:
+    """Find downloaded file in yt-dlp metadata."""
+    for entry in _iter_download_entries(info):
+        for candidate in _candidate_paths(entry, output_dir):
+            if candidate.is_file():
+                return entry, candidate
+            if not candidate.is_absolute():
+                resolved = output_dir / candidate
+                if resolved.is_file():
+                    return entry, resolved
+    raise FileNotFoundError("yt-dlp did not report a downloaded media file")
+
+
+def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
+    """Extract SHA256 hash from yt-dlp metadata."""
+    for payload in [info] + info.get("entries", []):
+        if not isinstance(payload, dict):
+            continue
+        hashes = payload.get("hashes")
+        if isinstance(hashes, dict):
+            for key in ("sha256", "sha-256", "sha_256"):
+                value = hashes.get(key)
+                if isinstance(value, str) and value.strip():
+                    return value.strip().lower()
+        for key in ("sha256", "sha-256", "sha_256"):
+            value = payload.get(key)
+            if isinstance(value, str) and value.strip():
+                return value.strip().lower()
+    return None
+
+
+def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
+    """Extract the actual download link from LibGen redirect URL.
+    
+    LibGen url like https://libgen.gl/file.php?id=123456 redirect to
+    actual mirror url. This follows the redirect chain to get the real file.
+    
+    Args:
+        libgen_url: LibGen file.php URL
+        
+    Returns:
+        Actual download URL or None if extraction fails
+    """
+    try:
+        import requests
+        from urllib.parse import urlparse
+        
+        # Check if this is a LibGen URL
+        parsed = urlparse(libgen_url)
+        if 'libgen' not in parsed.netloc.lower():
+            return None
+        
+        if '/file.php' not in parsed.path.lower():
+            return None
+        
+        # LibGen redirects to actual mirrors, follow redirects to get final URL
+        session = requests.Session()
+        session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+        })
+        
+        debug(f"Following LibGen redirect chain for: {libgen_url}")
+        
+        # First, get the page and look for direct download link
+        try:
+            response = session.get(libgen_url, timeout=10, allow_redirects=True)
+            final_url = response.url
+            
+            # Try to find actual download link in the page
+            try:
+                from bs4 import BeautifulSoup
+                soup = BeautifulSoup(response.content, 'html.parser')
+                
+                # Look for download links - LibGen typically has forms with download buttons
+                # Look for all links and forms that might lead to download
+                for link in soup.find_all('a'):
+                    href = link.get('href')
+                    if href and isinstance(href, str):
+                        # Look for direct file links or get.php redirects
+                        if 'get.php' in href.lower() or href.endswith(('.pdf', '.epub', '.djvu', '.mobi')):
+                            download_url = href if href.startswith('http') else urljoin(final_url, href)
+                            debug(f"Found download link: {download_url}")
+                            return download_url
+            except ImportError:
+                pass  # BeautifulSoup not available
+            
+            # If we followed redirects successfully, return the final URL
+            # This handles cases where libgen redirects to a direct download mirror
+            if final_url != libgen_url:
+                debug(f"LibGen resolved to mirror: {final_url}")
+                return final_url
+        
+        except requests.RequestException as e:
+            log(f"Error following LibGen redirects: {e}", file=sys.stderr)
+            # Try head request as fallback
+            try:
+                response = session.head(libgen_url, allow_redirects=True, timeout=10)
+                if response.url != libgen_url:
+                    debug(f"LibGen HEAD resolved to: {response.url}")
+                    return response.url
+            except:
+                pass
+        
+        return None
+        
+    except Exception as e:
+        log(f"Error resolving LibGen URL: {e}", file=sys.stderr)
+        return None
+
+
+def _download_direct_file(
+    url: str,
+    output_dir: Path,
+    debug_logger: Optional[DebugLogger] = None,
+    quiet: bool = False,
+) -> DownloadMediaResult:
+    """Download a direct file (PDF, image, document, etc.) without yt-dlp."""
+    ensure_directory(output_dir)
+
+    from urllib.parse import unquote, urlparse, parse_qs
+    import re
+    
+    # Extract filename from URL
+    parsed_url = urlparse(url)
+    url_path = parsed_url.path
+    
+    # Try to get filename from query parameters first (for LibGen and similar services)
+    # e.g., ?filename=Book+Title.pdf or &download=filename.pdf
+    filename = None
+    if parsed_url.query:
+        query_params = parse_qs(parsed_url.query)
+        for param_name in ('filename', 'download', 'file', 'name'):
+            if param_name in query_params and query_params[param_name]:
+                filename = query_params[param_name][0]
+                filename = unquote(filename)
+                break
+    
+    # If not found in query params, extract from URL path
+    if not filename or not filename.strip():
+        filename = url_path.split("/")[-1] if url_path else ""
+        filename = unquote(filename)
+    
+    # Remove query strings from filename if any
+    if "?" in filename:
+        filename = filename.split("?")[0]
+    
+    # Try to get real filename from Content-Disposition header (HEAD request)
+    try:
+        with HTTPClient(timeout=10.0) as client:
+            response = client._request("HEAD", url, follow_redirects=True)
+            content_disposition = response.headers.get("content-disposition", "")
+            if content_disposition:
+                # Extract filename from Content-Disposition header
+                # Format: attachment; filename="filename.pdf" or filename=filename.pdf
+                match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
+                if match:
+                    extracted_name = match.group(1) or match.group(2)
+                    if extracted_name:
+                        filename = unquote(extracted_name)
+                        if not quiet:
+                            debug(f"Filename from Content-Disposition: {filename}")
+    except Exception as e:
+        if not quiet:
+            log(f"Could not get filename from headers: {e}", file=sys.stderr)
+    
+    # Fallback if we still don't have a good filename
+    if not filename or "." not in filename:
+        filename = "downloaded_file.bin"
+
+    file_path = output_dir / filename
+    progress_bar = ProgressBar()
+
+    if not quiet:
+        debug(f"Direct download: {filename}")
+
+    try:
+        start_time = time.time()
+        downloaded_bytes = [0]
+        total_bytes = [0]
+        last_progress_time = [start_time]
+
+        def progress_callback(bytes_downloaded: int, content_length: int) -> None:
+            downloaded_bytes[0] = bytes_downloaded
+            total_bytes[0] = content_length
+
+            now = time.time()
+            if now - last_progress_time[0] >= 0.5 and total_bytes[0] > 0:
+                elapsed = now - start_time
+                percent = (bytes_downloaded / content_length) * 100 if content_length > 0 else 0
+                speed = bytes_downloaded / elapsed if elapsed > 0 else 0
+                eta_seconds = (content_length - bytes_downloaded) / speed if speed > 0 else 0
+
+                speed_str = progress_bar.format_bytes(speed) + "/s"
+                minutes, seconds = divmod(int(eta_seconds), 60)
+                hours, minutes = divmod(minutes, 60)
+                eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
+
+                progress_line = progress_bar.format_progress(
+                    percent_str=f"{percent:.1f}%",
+                    downloaded=bytes_downloaded,
+                    total=content_length,
+                    speed_str=speed_str,
+                    eta_str=eta_str,
+                )
+                if not quiet:
+                    debug(progress_line)
+                last_progress_time[0] = now
+
+        with HTTPClient(timeout=30.0) as client:
+            client.download(url, str(file_path), progress_callback=progress_callback)
+
+        elapsed = time.time() - start_time
+        avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
+        if not quiet:
+            debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}")
+
+        # For direct file downloads, create minimal info dict without filename as title
+        # This prevents creating duplicate title: tags when filename gets auto-generated
+        # We'll add title back later only if we couldn't extract meaningful tags
+        info = {
+            "id": filename.rsplit(".", 1)[0],
+            "ext": filename.rsplit(".", 1)[1] if "." in filename else "bin",
+            "webpage_url": url,
+        }
+
+        hash_value = None
+        try:
+            hash_value = sha256_file(file_path)
+        except Exception:
+            pass
+
+        tags = []
+        if extract_ytdlp_tags:
+            try:
+                tags = extract_ytdlp_tags(info)
+            except Exception as e:
+                log(f"Error extracting tags: {e}", file=sys.stderr)
+
+        # Only use filename as a title tag if we couldn't extract any meaningful tags
+        # This prevents duplicate title: tags when the filename could be mistaken for metadata
+        if not any(t.startswith('title:') for t in tags):
+            # Re-extract tags with filename as title only if needed
+            info['title'] = filename
+            tags = []
+            if extract_ytdlp_tags:
+                try:
+                    tags = extract_ytdlp_tags(info)
+                except Exception as e:
+                    log(f"Error extracting tags with filename: {e}", file=sys.stderr)
+
+        if debug_logger is not None:
+            debug_logger.write_record(
+                "direct-file-downloaded",
+                {"url": url, "path": str(file_path), "hash": hash_value},
+            )
+
+        return DownloadMediaResult(
+            path=file_path,
+            info=info,
+            tags=tags,
+            source_url=url,
+            hash_value=hash_value,
+        )
+
+    except (httpx.HTTPError, httpx.RequestError) as exc:
+        log(f"Download error: {exc}", file=sys.stderr)
+        if debug_logger is not None:
+            debug_logger.write_record(
+                "exception",
+                {"phase": "direct-file", "url": url, "error": str(exc)},
+            )
+        raise DownloadError(f"Failed to download {url}: {exc}") from exc
+    except Exception as exc:
+        log(f"Error downloading file: {exc}", file=sys.stderr)
+        if debug_logger is not None:
+            debug_logger.write_record(
+                "exception",
+                {
+                    "phase": "direct-file",
+                    "url": url,
+                    "error": str(exc),
+                    "traceback": traceback.format_exc(),
+                },
+            )
+        raise DownloadError(f"Error downloading file: {exc}") from exc
+
+
+def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]:
+    """Probe URL to extract metadata WITHOUT downloading.
+    
+    Args:
+        url: URL to probe
+        no_playlist: If True, ignore playlists and probe only the single video
+        timeout_seconds: Max seconds to wait for probe (default 15s)
+    
+    Returns:
+        Dict with keys: extractor, title, entries (if playlist), duration, etc.
+        Returns None if not supported by yt-dlp or on timeout.
+    """
+    if not is_url_supported_by_ytdlp(url):
+        return None
+    
+    # Wrap probe in timeout to prevent hanging on large playlists
+    import threading
+    from typing import cast
+    
+    result_container: List[Optional[Any]] = [None, None]  # [result, error]
+    
+    def _do_probe() -> None:
+        try:
+            _ensure_yt_dlp_ready()
+            
+            assert yt_dlp is not None
+            # Extract info without downloading
+            # Use extract_flat='in_playlist' to get full metadata for playlist items
+            ydl_opts = {
+                "quiet": True,  # Suppress all output
+                "no_warnings": True,
+                "socket_timeout": 10,
+                "retries": 2,  # Reduce retries for faster timeout
+                "skip_download": True,  # Don't actually download
+                "extract_flat": "in_playlist",  # Get playlist with metadata for each entry
+                "noprogress": True,  # No progress bars
+            }
+            
+            # Add cookies if available (lazy import to avoid circular dependency)
+            from hydrus_health_check import get_cookies_file_path  # local import
+
+            global_cookies = get_cookies_file_path()
+            if global_cookies:
+                ydl_opts["cookiefile"] = global_cookies
+            
+            # Add no_playlist option if specified
+            if no_playlist:
+                ydl_opts["noplaylist"] = True
+            
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:  # type: ignore[arg-type]
+                info = ydl.extract_info(url, download=False)
+            
+            if not isinstance(info, dict):
+                result_container[0] = None
+                return
+            
+            # Extract relevant fields
+            result_container[0] = {
+                "extractor": info.get("extractor", ""),
+                "title": info.get("title", ""),
+                "entries": info.get("entries", []),  # Will be populated if playlist
+                "duration": info.get("duration"),
+                "uploader": info.get("uploader"),
+                "description": info.get("description"),
+                "url": url,
+            }
+        except Exception as exc:
+            log(f"Probe error for {url}: {exc}")
+            result_container[1] = exc
+    
+    thread = threading.Thread(target=_do_probe, daemon=False)
+    thread.start()
+    thread.join(timeout=timeout_seconds)
+    
+    if thread.is_alive():
+        # Probe timed out - return None to fall back to direct download
+        debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download")
+        return None
+    
+    if result_container[1] is not None:
+        # Probe error - return None to proceed anyway
+        return None
+    
+    return cast(Optional[Dict[str, Any]], result_container[0])
+
+
+__all__ = [
+    "is_url_supported_by_ytdlp",
+    "list_formats",
+    "probe_url",
+    "DownloadError",
+    "DownloadOptions",
+    "DownloadMediaResult",
+]
+
--- a/SYS/file_server.py
+++ b/SYS/file_server.py
@@ -0,0 +1,180 @@
+"""Simple HTTP file server for serving files in web mode."""
+
+import threading
+import socket
+import logging
+from http.server import HTTPServer, SimpleHTTPRequestHandler
+from pathlib import Path
+from typing import Optional
+import mimetypes
+import urllib.parse
+
+logger = logging.getLogger(__name__)
+
+# Global server instance
+_file_server: Optional[HTTPServer] = None
+_server_thread: Optional[threading.Thread] = None
+_server_port: int = 8001
+
+
+class FileServerHandler(SimpleHTTPRequestHandler):
+    """HTTP request handler for file serving."""
+    
+    def do_GET(self):
+        """Handle GET requests."""
+        # Parse the path
+        parsed_path = urllib.parse.urlparse(self.path)
+        file_path = urllib.parse.unquote(parsed_path.path)
+        
+        # Remove leading slash
+        if file_path.startswith('/'):
+            file_path = file_path[1:]
+        
+        # Decode the file path (it's URL encoded)
+        try:
+            full_path = Path(file_path).resolve()
+            
+            # Security check: ensure the path is within allowed directories
+            # For now, allow all paths (can be restricted later)
+            
+            if full_path.is_file() and full_path.exists():
+                # Serve the file
+                logger.debug(f"Serving file: {full_path}")
+                
+                # Determine content type
+                content_type, _ = mimetypes.guess_type(str(full_path))
+                if content_type is None:
+                    content_type = 'application/octet-stream'
+                
+                try:
+                    with open(full_path, 'rb') as f:
+                        file_content = f.read()
+                    
+                    self.send_response(200)
+                    self.send_header('Content-type', content_type)
+                    self.send_header('Content-Length', str(len(file_content)))
+                    self.send_header('Content-Disposition', f'attachment; filename="{full_path.name}"')
+                    self.end_headers()
+                    self.wfile.write(file_content)
+                    logger.info(f"Successfully served file: {full_path.name}")
+                    return
+                except Exception as e:
+                    logger.error(f"Error serving file: {e}")
+                    self.send_error(500, "Internal server error")
+                    return
+            else:
+                logger.warning(f"File not found: {full_path}")
+                self.send_error(404, "File not found")
+                return
+        
+        except Exception as e:
+            logger.error(f"Error handling request: {e}")
+            self.send_error(400, "Bad request")
+    
+    def log_message(self, format, *args):
+        """Override to use our logger instead of stderr."""
+        logger.debug(format % args)
+
+
+def get_local_ip() -> Optional[str]:
+    """Get the local IP address that's accessible from other devices."""
+    try:
+        # Connect to a remote server to determine local IP
+        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+        s.connect(("8.8.8.8", 80))
+        ip = s.getsockname()[0]
+        s.close()
+        return ip
+    except Exception as e:
+        logger.warning(f"Failed to determine local IP: {e}")
+        return None
+
+
+def start_file_server(port: int = 8001) -> Optional[str]:
+    """Start the HTTP file server.
+    
+    Args:
+        port: Port to serve on
+        
+    Returns:
+        Server URL if successful, None otherwise
+    """
+    global _file_server, _server_thread, _server_port
+    
+    if _file_server is not None:
+        logger.debug(f"File server already running on port {_server_port}")
+        local_ip = get_local_ip()
+        if local_ip:
+            return f"http://{local_ip}:{_server_port}"
+        return None
+    
+    try:
+        _server_port = port
+        
+        # Create server
+        server_address = ('', port)
+        _file_server = HTTPServer(server_address, FileServerHandler)
+        
+        # Start in daemon thread
+        _server_thread = threading.Thread(target=_file_server.serve_forever, daemon=True)
+        _server_thread.start()
+        
+        logger.info(f"File server started on port {port}")
+        
+        # Get local IP
+        local_ip = get_local_ip()
+        if local_ip:
+            server_url = f"http://{local_ip}:{port}"
+            logger.info(f"File server accessible at: {server_url}")
+            return server_url
+        else:
+            logger.warning("Could not determine local IP")
+            return None
+    
+    except Exception as e:
+        logger.error(f"Failed to start file server: {e}")
+        _file_server = None
+        _server_thread = None
+        return None
+
+
+def stop_file_server():
+    """Stop the HTTP file server."""
+    global _file_server, _server_thread
+    
+    if _file_server is not None:
+        try:
+            _file_server.shutdown()
+            _file_server.server_close()
+            logger.info("File server stopped")
+        except Exception as e:
+            logger.error(f"Error stopping file server: {e}")
+        finally:
+            _file_server = None
+            _server_thread = None
+
+
+def get_file_url(file_path: Path, server_url: Optional[str] = None) -> Optional[str]:
+    """Get the HTTP URL for a file.
+    
+    Args:
+        file_path: Path to the file
+        server_url: Base server URL (gets determined if None)
+        
+    Returns:
+        HTTP URL to the file, or None if server not running
+    """
+    if not file_path.exists():
+        logger.warning(f"File does not exist: {file_path}")
+        return None
+    
+    if server_url is None:
+        local_ip = get_local_ip()
+        if not local_ip:
+            logger.error("Cannot determine local IP for file URL")
+            return None
+        server_url = f"http://{local_ip}:{_server_port}"
+    
+    # URL encode the file path
+    encoded_path = urllib.parse.quote(str(file_path.resolve()))
+    return f"{server_url}/{encoded_path}"
--- a/SYS/logger.py
+++ b/SYS/logger.py
@@ -0,0 +1,104 @@
+"""Unified logging utility for automatic file and function name tracking."""
+
+import sys
+import inspect
+import threading
+from pathlib import Path
+
+_DEBUG_ENABLED = False
+_thread_local = threading.local()
+
+def set_thread_stream(stream):
+    """Set a custom output stream for the current thread."""
+    _thread_local.stream = stream
+
+def get_thread_stream():
+    """Get the custom output stream for the current thread, if any."""
+    return getattr(_thread_local, 'stream', None)
+
+def set_debug(enabled: bool) -> None:
+    """Enable or disable debug logging."""
+    global _DEBUG_ENABLED
+    _DEBUG_ENABLED = enabled
+
+def is_debug_enabled() -> bool:
+    """Check if debug logging is enabled."""
+    return _DEBUG_ENABLED
+
+def debug(*args, **kwargs) -> None:
+    """Print debug message if debug logging is enabled.
+    
+    Automatically prepends [filename.function_name] to all output.
+    """
+    if not _DEBUG_ENABLED:
+        return
+    
+    # Check if stderr has been redirected to /dev/null (quiet mode)
+    # If so, skip output to avoid queuing in background worker's capture
+    try:
+        stderr_name = getattr(sys.stderr, 'name', '')
+        if 'nul' in str(stderr_name).lower() or '/dev/null' in str(stderr_name):
+            return
+    except Exception:
+        pass
+        
+    # Check for thread-local stream first
+    stream = get_thread_stream()
+    if stream:
+        kwargs['file'] = stream
+    # Set default to stderr for debug messages
+    elif 'file' not in kwargs:
+        kwargs['file'] = sys.stderr
+        
+    # Prepend DEBUG label
+    args = ("DEBUG:", *args)
+    
+    # Use the same logic as log()
+    log(*args, **kwargs)
+
+def log(*args, **kwargs) -> None:
+    """Print with automatic file.function prefix.
+    
+    Automatically prepends [filename.function_name] to all output.
+    Defaults to stdout if not specified.
+    
+    Example:
+        log("Upload started")  # Output: [add_file.run] Upload started
+    """
+    # When debug is disabled, suppress the automatic prefix for cleaner user-facing output.
+    add_prefix = _DEBUG_ENABLED
+
+    # Get the calling frame
+    frame = inspect.currentframe()
+    if frame is None:
+        print(*args, **kwargs)
+        return
+    
+    caller_frame = frame.f_back
+    if caller_frame is None:
+        print(*args, **kwargs)
+        return
+    
+    try:
+        # Get file name without extension
+        file_name = Path(caller_frame.f_code.co_filename).stem
+        
+        # Get function name
+        func_name = caller_frame.f_code.co_name
+        
+        # Check for thread-local stream first
+        stream = get_thread_stream()
+        if stream:
+            kwargs['file'] = stream
+        # Set default to stdout if not specified
+        elif 'file' not in kwargs:
+            kwargs['file'] = sys.stdout
+        
+        if add_prefix:
+            prefix = f"[{file_name}.{func_name}]"
+            print(prefix, *args, **kwargs)
+        else:
+            print(*args, **kwargs)
+    finally:
+        del frame
+        del caller_frame
--- a/SYS/progress.py
+++ b/SYS/progress.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+"""Text-based progress bar utilities for consistent display across all downloads."""
+
+import sys
+
+from SYS.logger import log, debug
+
+
+def format_progress_bar(current: int, total: int, width: int = 40, label: str = "") -> str:
+    """Create a text-based progress bar.
+
+    Args:
+        current: Current progress (bytes/items)
+        total: Total to complete (bytes/items)
+        width: Width of the bar in characters (default 40)
+        label: Optional label prefix
+
+    Returns:
+        Formatted progress bar string
+
+    Examples:
+        format_progress_bar(50, 100)
+        # Returns: "[████████████████░░░░░░░░░░░░░░░░░░░░] 50.0%"
+
+        format_progress_bar(256*1024*1024, 1024*1024*1024, label="download.zip")
+        # Returns: "download.zip: [████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0%"
+    """
+    if total <= 0:
+        percentage = 0
+        filled = 0
+    else:
+        percentage = (current / total) * 100
+        filled = int((current / total) * width)
+
+    bar = "█" * filled + "░" * (width - filled)
+    pct_str = f"{percentage:.1f}%"
+
+    if label:
+        result = f"{label}: [{bar}] {pct_str}"
+    else:
+        result = f"[{bar}] {pct_str}"
+
+    return result
+
+
+def format_size(bytes_val: float) -> str:
+    """Format bytes to human-readable size."""
+    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
+        if bytes_val < 1024:
+            return f"{bytes_val:.2f} {unit}"
+        bytes_val /= 1024
+    return f"{bytes_val:.2f} PB"
+
+
+def format_download_status(filename: str, current: int, total: int, speed: float = 0) -> str:
+    """Format download status with progress bar and details."""
+    bar = format_progress_bar(current, total, width=30)
+    size_current = format_size(current)
+    size_total = format_size(total)
+
+    if speed > 0:
+        speed_str = f" @ {format_size(speed)}/s"
+    else:
+        speed_str = ""
+
+    return f"{bar} ({size_current} / {size_total}{speed_str})"
+
+
+def print_progress(filename: str, current: int, total: int, speed: float = 0, end: str = "\r") -> None:
+    """Print download progress to stderr (doesn't interfere with piped output)."""
+    status = format_download_status(filename, current, total, speed)
+    debug(status, end=end, flush=True)
+
+
+def print_final_progress(filename: str, total: int, elapsed: float) -> None:
+    """Print final progress line (100%) with time elapsed."""
+    bar = format_progress_bar(total, total, width=30)
+    size_str = format_size(total)
+
+    if elapsed < 60:
+        time_str = f"{elapsed:.1f}s"
+    elif elapsed < 3600:
+        minutes = elapsed / 60
+        time_str = f"{minutes:.1f}m"
+    else:
+        hours = elapsed / 3600
+        time_str = f"{hours:.2f}h"
+
+    debug(f"{bar} ({size_str}) - {time_str}")
+
+
+if __name__ == "__main__":
+    import time
+
+    log("Progress Bar Demo:", file=sys.stderr)
+
+    for i in range(101):
+        print_progress("demo.bin", i * 10 * 1024 * 1024, 1024 * 1024 * 1024)
+        time.sleep(0.02)
+
+    print_final_progress("demo.bin", 1024 * 1024 * 1024, 2.0)
+    log()
--- a/SYS/tasks.py
+++ b/SYS/tasks.py
@@ -0,0 +1,155 @@
+"""Background task handling and IPC helpers for mpv integration."""
+from __future__ import annotations
+import errno
+import json
+import os
+import socket
+import subprocess
+import sys
+
+from SYS.logger import log
+import threading
+import time
+from typing import IO, Iterable
+def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
+    """Connect to the mpv IPC server located at *path*."""
+    deadline = time.time() + timeout
+    if not path:
+        return None
+    if os.name == 'nt':
+        # mpv exposes a named pipe on Windows. Keep retrying until it is ready.
+        while True:
+            try:
+                return open(path, 'r+b', buffering=0)
+            except FileNotFoundError:
+                if time.time() > deadline:
+                    return None
+                time.sleep(0.05)
+            except OSError as exc:  # Pipe busy
+                if exc.errno not in (errno.ENOENT, errno.EPIPE, errno.EBUSY):
+                    raise
+                if time.time() > deadline:
+                    return None
+                time.sleep(0.05)
+    else:
+        sock = socket.socket(socket.AF_UNIX)
+        while True:
+            try:
+                sock.connect(path)
+                return sock.makefile('r+b', buffering=0)
+            except FileNotFoundError:
+                if time.time() > deadline:
+                    return None
+                time.sleep(0.05)
+            except OSError as exc:
+                if exc.errno not in (errno.ENOENT, errno.ECONNREFUSED):
+                    raise
+                if time.time() > deadline:
+                    return None
+                time.sleep(0.05)
+def ipc_sender(ipc: IO[bytes] | None):
+    """Create a helper function for sending script messages via IPC."""
+    if ipc is None:
+        def _noop(_event: str, _payload: dict) -> None:
+            return None
+        return _noop
+    lock = threading.Lock()
+    def _send(event: str, payload: dict) -> None:
+        message = json.dumps({'command': ['script-message', event, json.dumps(payload)]}, ensure_ascii=False)
+        encoded = message.encode('utf-8') + b'\n'
+        with lock:
+            try:
+                ipc.write(encoded)
+                ipc.flush()
+            except OSError:
+                pass
+    return _send
+def iter_stream(stream: Iterable[str]) -> Iterable[str]:
+    for raw in stream:
+        yield raw.rstrip('\r\n')
+def _run_task(args, parser) -> int:
+    if not args.command:
+        parser.error('run-task requires a command to execute (use "--" before the command).')
+    env = os.environ.copy()
+    for entry in args.env:
+        key, sep, value = entry.partition('=')
+        if not sep:
+            parser.error(f'Invalid environment variable definition: {entry!r}')
+        env[key] = value
+    command = list(args.command)
+    if command and command[0] == '--':
+        command.pop(0)
+    notifier = ipc_sender(connect_ipc(args.ipc, timeout=args.ipc_timeout))
+    if not command:
+        notifier('downlow-task-event', {
+            'id': args.task_id,
+            'event': 'error',
+            'message': 'No command provided after separator',
+        })
+        log('[downlow.py] No command provided for run-task', file=sys.stderr)
+        return 1
+    if command and isinstance(command[0], str) and sys.executable:
+        first = command[0].lower()
+        if first in {'python', 'python3', 'py', 'python.exe', 'python3.exe', 'py.exe'}:
+            command[0] = sys.executable
+    if os.environ.get('DOWNLOW_DEBUG'):
+        log(f"Launching command: {command}", file=sys.stderr)
+    notifier('downlow-task-event', {
+        'id': args.task_id,
+        'event': 'start',
+        'command': command,
+        'cwd': args.cwd or os.getcwd(),
+    })
+    try:
+        process = subprocess.Popen(
+            command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            cwd=args.cwd or None,
+            env=env,
+            text=True,
+            bufsize=1,
+            universal_newlines=True,
+        )
+    except FileNotFoundError as exc:
+        notifier('downlow-task-event', {
+            'id': args.task_id,
+            'event': 'error',
+            'message': f'Executable not found: {exc.filename}',
+        })
+        log(f"{exc}", file=sys.stderr)
+        return 1
+    stdout_lines: list[str] = []
+    stderr_lines: list[str] = []
+    def pump(stream: IO[str], label: str, sink: list[str]) -> None:
+        for line in iter_stream(stream):
+            sink.append(line)
+            notifier('downlow-task-event', {
+                'id': args.task_id,
+                'event': label,
+                'line': line,
+            })
+    threads = []
+    if process.stdout:
+        t_out = threading.Thread(target=pump, args=(process.stdout, 'stdout', stdout_lines), daemon=True)
+        t_out.start()
+        threads.append(t_out)
+    if process.stderr:
+        t_err = threading.Thread(target=pump, args=(process.stderr, 'stderr', stderr_lines), daemon=True)
+        t_err.start()
+        threads.append(t_err)
+    return_code = process.wait()
+    for t in threads:
+        t.join(timeout=0.1)
+    notifier('downlow-task-event', {
+        'id': args.task_id,
+        'event': 'exit',
+        'returncode': return_code,
+        'success': return_code == 0,
+    })
+    # Also mirror aggregated output to stdout/stderr for compatibility when IPC is unavailable.
+    if stdout_lines:
+        log('\n'.join(stdout_lines))
+    if stderr_lines:
+        log('\n'.join(stderr_lines), file=sys.stderr)
+    return return_code
--- a/SYS/utils.py
+++ b/SYS/utils.py
@@ -0,0 +1,492 @@
+"""General-purpose helpers used across the downlow CLI."""
+from __future__ import annotations
+
+import json
+import hashlib
+import ffmpeg
+import base64
+import logging
+import time
+from pathlib import Path
+from typing import Any, Iterable
+from datetime import datetime
+from dataclasses import dataclass, field
+from fnmatch import fnmatch
+from urllib.parse import urlparse
+
+import SYS.utils_constant
+
+try:
+    import cbor2
+except ImportError:
+    cbor2 = None  # type: ignore
+
+CHUNK_SIZE = 1024 * 1024  # 1 MiB
+_format_logger = logging.getLogger(__name__)
+def ensure_directory(path: Path) -> None:
+    """Ensure *path* exists as a directory."""
+    try:
+        path.mkdir(parents=True, exist_ok=True)
+    except OSError as exc:  # pragma: no cover - surfaced to caller
+        raise RuntimeError(f"Failed to create directory {path}: {exc}") from exc
+def unique_path(path: Path) -> Path:
+    """Return a unique path by appending " (n)" if needed."""
+    if not path.exists():
+        return path
+    stem = path.stem
+    suffix = path.suffix
+    parent = path.parent
+    counter = 1
+    while True:
+        candidate = parent / f"{stem} ({counter}){suffix}"
+        if not candidate.exists():
+            return candidate
+        counter += 1
+
+def sanitize_metadata_value(value: Any) -> str | None:
+    if value is None:
+        return None
+    if not isinstance(value, str):
+        value = str(value)
+    value = value.replace('\x00', ' ').replace('\r', ' ').replace('\n', ' ').strip()
+    if not value:
+        return None
+    return value
+def unique_preserve_order(values: Iterable[str]) -> list[str]:
+    seen: set[str] = set()
+    ordered: list[str] = []
+    for value in values:
+        if value not in seen:
+            seen.add(value)
+            ordered.append(value)
+    return ordered
+def sha256_file(file_path: Path) -> str:
+    """Return the SHA-256 hex digest of *path*."""
+    hasher = hashlib.sha256()
+    with file_path.open('rb') as handle:
+        for chunk in iter(lambda: handle.read(CHUNK_SIZE), b''):
+            hasher.update(chunk)
+    return hasher.hexdigest()
+
+
+def create_metadata_sidecar(file_path: Path, metadata: dict) -> None:
+    """Create a .metadata sidecar file with JSON metadata.
+    
+    The metadata dict should contain title. If not present, it will be derived from
+    the filename. This ensures the .metadata file can be matched during batch import.
+    
+    Args:
+        file_path: Path to the exported file
+        metadata: Dictionary of metadata to save
+    """
+    if not metadata:
+        return
+    file_name = file_path.stem
+    file_ext = file_path.suffix.lower()
+    # Ensure metadata has a title field that matches the filename (without extension)
+    # This allows the sidecar to be matched and imported properly during batch import
+    if 'title' not in metadata or not metadata.get('title'):
+        metadata['title'] = file_name
+    metadata['hash'] = sha256_file(file_path)
+    metadata['size'] = Path(file_path).stat().st_size
+    format_found = False
+    for mime_type, ext_map in SYS.utils_constant.mime_maps.items():
+        for key, info in ext_map.items():
+            if info.get("ext") == file_ext:
+                metadata['type'] = mime_type
+                format_found = True
+                break
+        if format_found:
+            break
+    else:
+        metadata['type'] = 'unknown'
+    metadata.update(ffprobe(str(file_path)))
+
+    
+    metadata_path = file_path.with_suffix(file_path.suffix + '.metadata')
+    try:
+        with open(metadata_path, 'w', encoding='utf-8') as f:
+            json.dump(metadata, f, ensure_ascii=False, indent=2)
+    except OSError as exc:
+        raise RuntimeError(f"Failed to write metadata sidecar {metadata_path}: {exc}") from exc
+    
+def create_tags_sidecar(file_path: Path, tags: set) -> None:
+    """Create a .tags sidecar file with tags (one per line).
+    
+    Args:
+        file_path: Path to the exported file
+        tags: Set of tag strings
+    """
+    if not tags:
+        return
+    
+    tags_path = file_path.with_suffix(file_path.suffix + '.tags')
+    try:
+        with open(tags_path, 'w', encoding='utf-8') as f:
+            for tag in sorted(tags):
+                f.write(f"{tag}\n")
+    except Exception as e:
+        raise RuntimeError(f"Failed to create tags sidecar {tags_path}: {e}") from e
+
+        
+def ffprobe(file_path: str) -> dict:
+    probe = ffmpeg.probe(file_path)
+    metadata = {}
+
+    # Format-level info
+    fmt = probe.get("format", {})
+    metadata["duration"] = float(fmt.get("duration", 0)) if "duration" in fmt else None
+    metadata["size"] = int(fmt.get("size", 0)) if "size" in fmt else None
+    metadata["format_name"] = fmt.get("format_name", None)
+
+    # Stream-level info
+    for stream in probe.get("streams", []):
+        codec_type = stream.get("codec_type")
+        if codec_type == "audio":
+            metadata["audio_codec"] = stream.get("codec_name")
+            metadata["bitrate"] = int(stream.get("bit_rate", 0)) if "bit_rate" in stream else None
+            metadata["samplerate"] = int(stream.get("sample_rate", 0)) if "sample_rate" in stream else None
+            metadata["channels"] = int(stream.get("channels", 0)) if "channels" in stream else None
+        elif codec_type == "video":
+            metadata["video_codec"] = stream.get("codec_name")
+            metadata["width"] = int(stream.get("width", 0)) if "width" in stream else None
+            metadata["height"] = int(stream.get("height", 0)) if "height" in stream else None
+        elif codec_type == "image":
+            metadata["image_codec"] = stream.get("codec_name")
+            metadata["width"] = int(stream.get("width", 0)) if "width" in stream else None
+            metadata["height"] = int(stream.get("height", 0)) if "height" in stream else None
+
+    return metadata
+
+
+# ============================================================================
+# CBOR Utilities - Consolidated from cbor.py
+# ============================================================================
+"""CBOR utilities backed by the `cbor2` library."""
+
+
+def decode_cbor(data: bytes) -> Any:
+    """Decode *data* from CBOR into native Python objects."""
+    if not data:
+        return None
+    if cbor2 is None:
+        raise ImportError("cbor2 library is required for CBOR decoding")
+    return cbor2.loads(data)
+
+
+def jsonify(value: Any) -> Any:
+    """Convert *value* into a JSON-friendly structure."""
+    if isinstance(value, dict):
+        return {str(key): jsonify(val) for key, val in value.items()}
+    if isinstance(value, list):
+        return [jsonify(item) for item in value]
+    if isinstance(value, bytes):
+        return {"__bytes__": base64.b64encode(value).decode("ascii")}
+    return value
+
+
+# ============================================================================
+# Format Utilities - Consolidated from format_utils.py
+# ============================================================================
+"""Formatting utilities for displaying metadata consistently across the application."""
+
+
+def format_bytes(bytes_value) -> str:
+    """Format bytes to human-readable format (e.g., '1.5 MB', '250 KB').
+    
+    Args:
+        bytes_value: Size in bytes (int or float)
+        
+    Returns:
+        Formatted string like '1.5 MB' or '756 MB'
+    """
+    if bytes_value is None or bytes_value <= 0:
+        return "0 B"
+    
+    if isinstance(bytes_value, (int, float)):
+        for unit in ("B", "KB", "MB", "GB", "TB"):
+            if bytes_value < 1024:
+                if unit == "B":
+                    return f"{int(bytes_value)} {unit}"
+                return f"{bytes_value:.1f} {unit}"
+            bytes_value /= 1024
+        return f"{bytes_value:.1f} PB"
+    return str(bytes_value)
+
+
+def format_duration(seconds) -> str:
+    """Format duration in seconds to human-readable format (e.g., '1h 23m 5s', '5m 30s').
+    
+    Args:
+        seconds: Duration in seconds (int or float)
+        
+    Returns:
+        Formatted string like '1:23:45' or '5:30'
+    """
+    if seconds is None or seconds == '':
+        return "N/A"
+    
+    if isinstance(seconds, str):
+        try:
+            seconds = float(seconds)
+        except ValueError:
+            return str(seconds)
+    
+    if not isinstance(seconds, (int, float)):
+        return str(seconds)
+    
+    total_seconds = int(seconds)
+    if total_seconds < 0:
+        return "N/A"
+    
+    hours = total_seconds // 3600
+    minutes = (total_seconds % 3600) // 60
+    secs = total_seconds % 60
+    
+    if hours > 0:
+        return f"{hours}:{minutes:02d}:{secs:02d}"
+    elif minutes > 0:
+        return f"{minutes}:{secs:02d}"
+    else:
+        return f"{secs}s"
+
+
+def format_timestamp(timestamp_str) -> str:
+    """Format ISO timestamp to readable format.
+    
+    Args:
+        timestamp_str: ISO format timestamp string or None
+        
+    Returns:
+        Formatted string like "2025-10-28 19:36:01" or original string if parsing fails
+    """
+    if not timestamp_str:
+        return "N/A"
+    
+    try:
+        # Handle ISO format timestamps
+        if isinstance(timestamp_str, str):
+            # Try parsing ISO format
+            if 'T' in timestamp_str:
+                dt = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
+            else:
+                # Try other common formats
+                dt = datetime.fromisoformat(timestamp_str)
+            return dt.strftime("%Y-%m-%d %H:%M:%S")
+    except Exception as e:
+        _format_logger.debug(f"Could not parse timestamp '{timestamp_str}': {e}")
+    
+    return str(timestamp_str)
+
+
+def format_metadata_value(key: str, value) -> str:
+    """Format a metadata value based on its key for display.
+    
+    This is the central formatting rule for all metadata display.
+    
+    Args:
+        key: Metadata field name
+        value: Value to format
+        
+    Returns:
+        Formatted string for display
+    """
+    if value is None or value == '':
+        return "N/A"
+    
+    # Apply field-specific formatting
+    if key in ('size', 'file_size'):
+        return format_bytes(value)
+    elif key in ('duration', 'length'):
+        return format_duration(value)
+    elif key in ('time_modified', 'time_imported', 'created_at', 'updated_at', 'indexed_at', 'timestamp'):
+        return format_timestamp(value)
+    else:
+        return str(value)
+
+
+# ============================================================================
+# Link Utilities - Consolidated from link_utils.py
+# ============================================================================
+"""Link utilities - Extract and process url from various sources."""
+
+
+def extract_link_from_args(args: Iterable[str]) -> Any | None:
+    """Extract HTTP/HTTPS URL from command arguments.
+    
+    Args:
+        args: Command arguments
+        
+    Returns:
+        URL string if found, None otherwise
+    """
+    args_list = list(args) if not isinstance(args, (list, tuple)) else args
+    if not args_list or len(args_list) == 0:
+        return None
+    
+    potential_link = str(args_list[0])
+    if potential_link.startswith(('http://', 'https://')):
+        return potential_link
+    
+    return None
+
+
+def extract_link_from_result(result: Any) -> Any | None:
+    """Extract URL from a result object (dict or object with attributes).
+    
+    Args:
+        result: Result object from pipeline (dict or object)
+        
+    Returns:
+        URL string if found, None otherwise
+    """
+    if isinstance(result, dict):
+        return result.get('url') or result.get('link') or result.get('href')
+    
+    return (
+        getattr(result, 'url', None) or 
+        getattr(result, 'link', None) or 
+        getattr(result, 'href', None)
+    )
+
+
+def extract_link(result: Any, args: Iterable[str]) -> Any | None:
+    """Extract link from args or result (args take priority).
+    
+    Args:
+        result: Pipeline result object
+        args: Command arguments
+        
+    Returns:
+        URL string if found, None otherwise
+    """
+    # Try args first
+    link = extract_link_from_args(args)
+    if link:
+        return link
+    
+    # Fall back to result
+    return extract_link_from_result(result)
+
+
+def get_api_key(config: dict[str, Any], service: str, key_path: str) -> str | None:
+    """Get API key from config with fallback support.
+    
+    Args:
+        config: Configuration dictionary
+        service: Service name for logging
+        key_path: Dot-notation path to key (e.g., "Debrid.All-debrid")
+        
+    Returns:
+        API key if found and not empty, None otherwise
+    """
+    try:
+        parts = key_path.split('.')
+        value = config
+        for part in parts:
+            if isinstance(value, dict):
+                value = value.get(part)
+            else:
+                return None
+        
+        if isinstance(value, str):
+            return value.strip() or None
+        
+        return None
+    except Exception:
+        return None
+
+
+def add_direct_link_to_result(result: Any, direct_link: str, original_link: str) -> None:
+    """Add direct link information to result object.
+    
+    Args:
+        result: Result object to modify (dict or object)
+        direct_link: The unlocked/direct URL
+        original_link: The original restricted URL
+    """
+    if isinstance(result, dict):
+        result['direct_link'] = direct_link
+        result['original_link'] = original_link
+    else:
+        setattr(result, 'direct_link', direct_link)
+        setattr(result, 'original_link', original_link)
+
+
+# ============================================================================
+# URL Policy Resolution - Consolidated from url_parser.py
+# ============================================================================
+"""URL policy resolution for downlow workflows."""
+
+
+@dataclass(slots=True)
+class UrlPolicy:
+    """Describe how a URL should be handled by download and screenshot flows."""
+
+    skip_download: bool = False
+    skip_metadata: bool = False
+    force_screenshot: bool = False
+    extra_tags: list[str] = field(default_factory=list)
+
+    def apply_tags(self, sources: Iterable[str]) -> list[str]:
+        tags = [tag.strip() for tag in self.extra_tags if tag and tag.strip()]
+        for value in sources:
+            text = str(value).strip()
+            if text:
+                tags.append(text)
+        return tags
+
+
+def _normalise_rule(rule: dict[str, Any]) -> dict[str, Any] | None:
+    pattern = str(rule.get("pattern") or rule.get("host") or "").strip()
+    if not pattern:
+        return None
+    skip_download = bool(rule.get("skip_download"))
+    skip_metadata = bool(rule.get("skip_metadata"))
+    force_screenshot = bool(rule.get("force_screenshot"))
+    extra_tags_raw = rule.get("extra_tags")
+    if isinstance(extra_tags_raw, str):
+        extra_tags = [part.strip() for part in extra_tags_raw.split(",") if part.strip()]
+    elif isinstance(extra_tags_raw, (list, tuple, set)):
+        extra_tags = [str(item).strip() for item in extra_tags_raw if str(item).strip()]
+    else:
+        extra_tags = []
+    return {
+        "pattern": pattern,
+        "skip_download": skip_download,
+        "skip_metadata": skip_metadata,
+        "force_screenshot": force_screenshot,
+        "extra_tags": extra_tags,
+    }
+
+
+def resolve_url_policy(config: dict[str, Any], url: str) -> UrlPolicy:
+    policies_raw = config.get("url_policies")
+    if not policies_raw:
+        return UrlPolicy()
+    if not isinstance(policies_raw, list):
+        return UrlPolicy()
+    parsed = urlparse(url)
+    subject = f"{parsed.netloc}{parsed.path}"
+    host = parsed.netloc
+    resolved = UrlPolicy()
+    for rule_raw in policies_raw:
+        if not isinstance(rule_raw, dict):
+            continue
+        rule = _normalise_rule(rule_raw)
+        if rule is None:
+            continue
+        pattern = rule["pattern"]
+        if not (fnmatch(host, pattern) or fnmatch(subject, pattern)):
+            continue
+        if rule["skip_download"]:
+            resolved.skip_download = True
+        if rule["skip_metadata"]:
+            resolved.skip_metadata = True
+        if rule["force_screenshot"]:
+            resolved.force_screenshot = True
+        if rule["extra_tags"]:
+            for tag in rule["extra_tags"]:
+                if tag not in resolved.extra_tags:
+                    resolved.extra_tags.append(tag)
+    return resolved
--- a/SYS/utils_constant.py
+++ b/SYS/utils_constant.py
@@ -0,0 +1,100 @@
+mime_maps = {
+  "image": {
+    "jpg":  { "ext": ".jpg", "mimes": ["image/jpeg", "image/jpg"] },
+    "png":  { "ext": ".png", "mimes": ["image/png"] },
+    "gif":  { "ext": ".gif", "mimes": ["image/gif"] },
+    "webp": { "ext": ".webp", "mimes": ["image/webp"] },
+    "avif": { "ext": ".avif", "mimes": ["image/avif"] },
+    "jxl":  { "ext": ".jxl", "mimes": ["image/jxl"] },
+    "bmp":  { "ext": ".bmp", "mimes": ["image/bmp"] },
+    "heic": { "ext": ".heic", "mimes": ["image/heic"] },
+    "heif": { "ext": ".heif", "mimes": ["image/heif"] },
+    "ico":  { "ext": ".ico", "mimes": ["image/x-icon", "image/vnd.microsoft.icon"] },
+    "qoi":  { "ext": ".qoi", "mimes": ["image/qoi"] },
+    "tiff": { "ext": ".tiff", "mimes": ["image/tiff", "image/x-tiff"] },
+    "svg":  { "ext": ".svg", "mimes": ["image/svg+xml"] }
+  },
+  "image_sequence": {
+    "apng":  { "ext": ".apng", "mimes": ["image/apng"], "sequence": True },
+    "avifs": { "ext": ".avifs", "mimes": ["image/avif-sequence"], "sequence": True },
+    "heics": { "ext": ".heics", "mimes": ["image/heic-sequence"], "sequence": True },
+    "heifs": { "ext": ".heifs", "mimes": ["image/heif-sequence"], "sequence": True }
+  },
+  "video": {
+    "mp4": { "ext": ".mp4", "mimes": ["video/mp4", "audio/mp4"] },
+    "webm": { "ext": ".webm", "mimes": ["video/webm", "audio/webm"] },
+    "mov":  { "ext": ".mov", "mimes": ["video/quicktime"] },
+    "ogv":  { "ext": ".ogv", "mimes": ["video/ogg"] },
+    "mpeg": { "ext": ".mpeg", "mimes": ["video/mpeg"] },
+    "avi":  { "ext": ".avi", "mimes": ["video/x-msvideo", "video/avi"] },
+    "flv":  { "ext": ".flv", "mimes": ["video/x-flv"] },
+    "mkv":  { "ext": ".mkv", "mimes": ["video/x-matroska", "application/x-matroska"], "audio_only_ext": ".mka" },
+    "wmv":  { "ext": ".wmv", "mimes": ["video/x-ms-wmv"] },
+    "rv":   { "ext": ".rv",  "mimes": ["video/vnd.rn-realvideo"] }
+  },
+  "audio": {
+    "mp3":  { "ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"] },
+    "m4a":  { "ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"] },
+    "ogg":  { "ext": ".ogg", "mimes": ["audio/ogg"] },
+    "flac": { "ext": ".flac", "mimes": ["audio/flac"] },
+    "wav":  { "ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"] },
+    "wma":  { "ext": ".wma", "mimes": ["audio/x-ms-wma"] },
+    "tta":  { "ext": ".tta", "mimes": ["audio/x-tta"] },
+    "wv":   { "ext": ".wv",  "mimes": ["audio/x-wavpack", "audio/wavpack"] },
+    "mka":  { "ext": ".mka", "mimes": ["audio/x-matroska", "video/x-matroska"] }
+  },
+  "document": {
+    "pdf":  { "ext": ".pdf", "mimes": ["application/pdf"] },
+    "epub": { "ext": ".epub", "mimes": ["application/epub+zip"] },
+    "djvu": { "ext": ".djvu", "mimes": ["application/vnd.djvu"] },
+    "rtf":  { "ext": ".rtf", "mimes": ["application/rtf"] },
+    "docx": { "ext": ".docx", "mimes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] },
+    "xlsx": { "ext": ".xlsx", "mimes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] },
+    "pptx": { "ext": ".pptx", "mimes": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] },
+    "doc":  { "ext": ".doc", "mimes": ["application/msword"] },
+    "xls":  { "ext": ".xls", "mimes": ["application/vnd.ms-excel"] },
+    "ppt":  { "ext": ".ppt", "mimes": ["application/vnd.ms-powerpoint"] }
+  },
+  "archive": {
+    "zip":  { "ext": ".zip", "mimes": ["application/zip"] },
+    "7z":   { "ext": ".7z",  "mimes": ["application/x-7z-compressed"] },
+    "rar":  { "ext": ".rar", "mimes": ["application/x-rar-compressed", "application/vnd.rar"] },
+    "gz":   { "ext": ".gz",  "mimes": ["application/gzip", "application/x-gzip"] },
+    "tar":  { "ext": ".tar", "mimes": ["application/x-tar"] },
+    "cbz":  { "ext": ".cbz", "mimes": ["application/zip"], "note": "zip archive of images; prefer extension-based detection for comics" }
+  },
+  "project": {
+    "clip":      { "ext": ".clip", "mimes": ["application/clip"] },
+    "kra":       { "ext": ".kra",  "mimes": ["application/x-krita"] },
+    "procreate": { "ext": ".procreate", "mimes": ["application/x-procreate"] },
+    "psd":       { "ext": ".psd",  "mimes": ["image/vnd.adobe.photoshop"] },
+    "swf":       { "ext": ".swf",  "mimes": ["application/x-shockwave-flash"] }
+  },
+  "other": {
+    "octet-stream": { "ext": "", "mimes": ["application/octet-stream"] },
+    "json":  { "ext": ".json",  "mimes": ["application/json"] },
+    "xml":   { "ext": ".xml",   "mimes": ["application/xml", "text/xml"] },
+    "csv":   { "ext": ".csv",   "mimes": ["text/csv"] }
+  }
+}
+
+
+def get_type_from_ext(ext: str) -> str:
+    """Determine the type (e.g., 'image', 'video', 'audio') from file extension.
+
+    Args:
+        ext: File extension (with or without leading dot, e.g., 'jpg' or '.jpg')
+
+    Returns:
+        Type string (e.g., 'image', 'video', 'audio') or 'other' if unknown
+    """
+    if not ext:
+        return 'other'
+
+    ext_clean = ext.lstrip('.').lower()
+
+    for type_name, extensions_dict in mime_maps.items():
+        if ext_clean in extensions_dict:
+            return type_name
+
+    return 'other'
--- a/SYS/worker_manager.py
+++ b/SYS/worker_manager.py
@@ -0,0 +1,671 @@
+"""Worker task management with persistent database storage.
+
+Manages worker tasks for downloads, searches, imports, etc. with automatic
+persistence to database and optional auto-refresh callbacks.
+"""
+
+import logging
+from pathlib import Path
+from typing import Optional, Dict, Any, List, Callable
+from datetime import datetime
+from threading import Thread, Lock
+import time
+
+from ..API.folder import API_folder_store
+from SYS.logger import log
+
+logger = logging.getLogger(__name__)
+
+
+class Worker:
+    """Represents a single worker task with state management."""
+    
+    def __init__(self, worker_id: str, worker_type: str, title: str = "", 
+                 description: str = "", manager: Optional['WorkerManager'] = None):
+        """Initialize a worker.
+        
+        Args:
+            worker_id: Unique identifier for this worker
+            worker_type: Type of work (e.g., 'download', 'search', 'import')
+            title: Human-readable title
+            description: Detailed description
+            manager: Reference to parent WorkerManager for state updates
+        """
+        self.id = worker_id
+        self.type = worker_type
+        self.title = title or worker_type
+        self.description = description
+        self.manager = manager
+        self.status = "running"
+        self.progress = ""
+        self.details = ""
+        self.error_message = ""
+        self.result = "pending"
+        self._stdout_buffer = []
+        self._steps_buffer = []
+    
+    def log_step(self, step_text: str) -> None:
+        """Log a step for this worker.
+        
+        Args:
+            step_text: Text describing the step
+        """
+        try:
+            if self.manager:
+                self.manager.log_step(self.id, step_text)
+            else:
+                logger.info(f"[{self.id}] {step_text}")
+        except Exception as e:
+            logger.error(f"Error logging step for worker {self.id}: {e}")
+    
+    def append_stdout(self, text: str) -> None:
+        """Append text to stdout log.
+        
+        Args:
+            text: Text to append
+        """
+        try:
+            if self.manager:
+                self.manager.append_worker_stdout(self.id, text)
+            else:
+                self._stdout_buffer.append(text)
+        except Exception as e:
+            logger.error(f"Error appending stdout for worker {self.id}: {e}")
+    
+    def get_stdout(self) -> str:
+        """Get all stdout for this worker.
+        
+        Returns:
+            Complete stdout text
+        """
+        try:
+            if self.manager:
+                return self.manager.get_stdout(self.id)
+            else:
+                return "\n".join(self._stdout_buffer)
+        except Exception as e:
+            logger.error(f"Error getting stdout for worker {self.id}: {e}")
+            return ""
+    
+    def get_steps(self) -> str:
+        """Get all steps for this worker.
+        
+        Returns:
+            Complete steps text
+        """
+        try:
+            if self.manager:
+                return self.manager.get_steps(self.id)
+            else:
+                return "\n".join(self._steps_buffer)
+        except Exception as e:
+            logger.error(f"Error getting steps for worker {self.id}: {e}")
+            return ""
+    
+    def update_progress(self, progress: str = "", details: str = "") -> None:
+        """Update worker progress.
+        
+        Args:
+            progress: Progress string (e.g., "50%")
+            details: Additional details
+        """
+        self.progress = progress
+        self.details = details
+        try:
+            if self.manager:
+                self.manager.update_worker(self.id, progress, details)
+        except Exception as e:
+            logger.error(f"Error updating worker {self.id}: {e}")
+    
+    def finish(self, result: str = "completed", message: str = "") -> None:
+        """Mark worker as finished.
+        
+        Args:
+            result: Result status ('completed', 'error', 'cancelled')
+            message: Result message/error details
+        """
+        self.result = result
+        self.status = "finished"
+        self.error_message = message
+        try:
+            if self.manager:
+                # Flush and disable logging handler before marking finished
+                self.manager.disable_logging_for_worker(self.id)
+                # Then mark as finished in database
+                self.manager.finish_worker(self.id, result, message)
+        except Exception as e:
+            logger.error(f"Error finishing worker {self.id}: {e}")
+
+
+class WorkerLoggingHandler(logging.StreamHandler):
+    """Custom logging handler that captures logs for a worker."""
+    
+    def __init__(self, worker_id: str, db: API_folder_store,
+                 manager: Optional['WorkerManager'] = None,
+                 buffer_size: int = 50):
+        """Initialize the handler.
+        
+        Args:
+            worker_id: ID of the worker to capture logs for
+            db: Reference to LocalLibraryDB for storing logs
+            buffer_size: Number of logs to buffer before flushing to DB
+        """
+        super().__init__()
+        self.worker_id = worker_id
+        self.db = db
+        self.manager = manager
+        self.buffer_size = buffer_size
+        self.buffer = []
+        self._lock = Lock()
+        
+        # Set a format that includes timestamp and level
+        formatter = logging.Formatter(
+            '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+            datefmt='%Y-%m-%d %H:%M:%S'
+        )
+        self.setFormatter(formatter)
+    
+    def emit(self, record):
+        """Emit a log record."""
+        try:
+            # Try to format the record normally
+            try:
+                msg = self.format(record)
+            except (TypeError, ValueError):
+                # If formatting fails (e.g., %d format with non-int arg),
+                # build message manually without calling getMessage()
+                try:
+                    # Try to format with args if possible
+                    if record.args:
+                        msg = record.msg % record.args
+                    else:
+                        msg = record.msg
+                except (TypeError, ValueError):
+                    # If that fails too, just use the raw message string
+                    msg = str(record.msg)
+                
+                # Add timestamp and level if not already in message
+                import time
+                timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(record.created))
+                msg = f"{timestamp} - {record.name} - {record.levelname} - {msg}"
+            
+            with self._lock:
+                self.buffer.append(msg)
+                
+                # Flush to DB when buffer reaches size
+                if len(self.buffer) >= self.buffer_size:
+                    self._flush()
+        except Exception:
+            self.handleError(record)
+    
+    def _flush(self):
+        """Flush buffered logs to database."""
+        if self.buffer:
+            log_text = '\n'.join(self.buffer)
+            try:
+                if self.manager:
+                    self.manager.append_worker_stdout(self.worker_id, log_text, channel='log')
+                else:
+                    self.db.append_worker_stdout(self.worker_id, log_text, channel='log')
+            except Exception as e:
+                # If we can't write to DB, at least log it
+                log(f"Error flushing worker logs: {e}")
+            self.buffer = []
+    
+    def flush(self):
+        """Flush any buffered records."""
+        with self._lock:
+            self._flush()
+        super().flush()
+    
+    def close(self):
+        """Close the handler."""
+        self.flush()
+        super().close()
+
+
+class WorkerManager:
+    """Manages persistent worker tasks with auto-refresh capability."""
+    
+    def __init__(self, library_root: Path, auto_refresh_interval: float = 2.0):
+        """Initialize the worker manager.
+        
+        Args:
+            library_root: Root directory for the local library database
+            auto_refresh_interval: Seconds between auto-refresh checks (0 = disabled)
+        """
+        self.library_root = Path(library_root)
+        self.db = API_folder_store(library_root)
+        self.auto_refresh_interval = auto_refresh_interval
+        self.refresh_callbacks: List[Callable] = []
+        self.refresh_thread: Optional[Thread] = None
+        self._stop_refresh = False
+        self._lock = Lock()
+        self.worker_handlers: Dict[str, WorkerLoggingHandler] = {}  # Track active handlers
+        self._worker_last_step: Dict[str, str] = {}
+    
+    def close(self) -> None:
+        """Close the database connection."""
+        if self.db:
+            try:
+                self.db.close()
+            except Exception:
+                pass
+    
+    def __enter__(self):
+        """Context manager entry."""
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit - close database."""
+        self.close()
+    
+    def add_refresh_callback(self, callback: Callable[[List[Dict[str, Any]]], None]) -> None:
+        """Register a callback to be called on worker updates.
+        
+        Args:
+            callback: Function that receives list of active workers
+        """
+        with self._lock:
+            self.refresh_callbacks.append(callback)
+
+    def expire_running_workers(
+        self,
+        older_than_seconds: int = 300,
+        worker_id_prefix: Optional[str] = None,
+        reason: Optional[str] = None,
+        status: str = "error",
+    ) -> int:
+        """Mark stale running workers as finished.
+
+        Args:
+            older_than_seconds: Idle threshold before expiring.
+            worker_id_prefix: Optional wildcard filter (e.g., 'cli_%').
+            reason: Error message if none already exists.
+            status: New status to apply.
+
+        Returns:
+            Count of workers updated.
+        """
+        try:
+            return self.db.expire_running_workers(
+                older_than_seconds=older_than_seconds,
+                status=status,
+                reason=reason,
+                worker_id_prefix=worker_id_prefix,
+            )
+        except Exception as exc:
+            logger.error(f"Failed to expire stale workers: {exc}", exc_info=True)
+            return 0
+    
+    def remove_refresh_callback(self, callback: Callable) -> None:
+        """Remove a refresh callback.
+        
+        Args:
+            callback: The callback function to remove
+        """
+        with self._lock:
+            if callback in self.refresh_callbacks:
+                self.refresh_callbacks.remove(callback)
+    
+    def enable_logging_for_worker(self, worker_id: str) -> Optional[WorkerLoggingHandler]:
+        """Enable logging capture for a worker.
+        
+        Creates a logging handler that captures all logs for this worker.
+        
+        Args:
+            worker_id: ID of the worker to capture logs for
+            
+        Returns:
+            The logging handler that was created, or None if there was an error
+        """
+        try:
+            handler = WorkerLoggingHandler(worker_id, self.db, manager=self)
+            with self._lock:
+                self.worker_handlers[worker_id] = handler
+            
+            # Add the handler to the root logger so it captures all logs
+            root_logger = logging.getLogger()
+            root_logger.addHandler(handler)
+            root_logger.setLevel(logging.DEBUG)  # Capture all levels
+            
+            logger.debug(f"[WorkerManager] Enabled logging for worker: {worker_id}")
+            return handler
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True)
+            return None
+    
+    def disable_logging_for_worker(self, worker_id: str) -> None:
+        """Disable logging capture for a worker and flush any pending logs.
+        
+        Args:
+            worker_id: ID of the worker to stop capturing logs for
+        """
+        try:
+            with self._lock:
+                handler = self.worker_handlers.pop(worker_id, None)
+            
+            if handler:
+                # Flush and close the handler
+                handler.flush()
+                handler.close()
+                
+                # Remove from root logger
+                root_logger = logging.getLogger()
+                root_logger.removeHandler(handler)
+                
+                logger.debug(f"[WorkerManager] Disabled logging for worker: {worker_id}")
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}", exc_info=True)
+    
+    def track_worker(self, worker_id: str, worker_type: str, title: str = "", 
+                    description: str = "", total_steps: int = 0,
+                    pipe: Optional[str] = None) -> bool:
+        """Start tracking a new worker.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            worker_type: Type of worker (e.g., 'download', 'search', 'import')
+            title: Worker title/name
+            description: Worker description
+            total_steps: Total number of steps for progress tracking
+            pipe: Text of the originating pipe/prompt, if any
+            
+        Returns:
+            True if worker was inserted successfully
+        """
+        try:
+            result = self.db.insert_worker(worker_id, worker_type, title, description, total_steps, pipe=pipe)
+            if result > 0:
+                logger.debug(f"[WorkerManager] Tracking worker: {worker_id} ({worker_type})")
+                self._start_refresh_if_needed()
+                return True
+            return False
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error tracking worker: {e}", exc_info=True)
+            return False
+    
+    def update_worker(self, worker_id: str, progress: float = 0.0, current_step: str = "",
+                     details: str = "", error: str = "") -> bool:
+        """Update worker progress and status.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            progress: Progress percentage (0-100)
+            current_step: Current step description
+            details: Additional details
+            error: Error message if any
+            
+        Returns:
+            True if update was successful
+        """
+        try:
+            kwargs = {}
+            if progress > 0:
+                kwargs['progress'] = progress
+            if current_step:
+                kwargs['current_step'] = current_step
+            if details:
+                kwargs['description'] = details
+            if error:
+                kwargs['error_message'] = error
+            
+            if kwargs:
+                kwargs['last_updated'] = datetime.now().isoformat()
+                if 'current_step' in kwargs and kwargs['current_step']:
+                    self._worker_last_step[worker_id] = str(kwargs['current_step'])
+                return self.db.update_worker(worker_id, **kwargs)
+            return True
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error updating worker {worker_id}: {e}", exc_info=True)
+            return False
+    
+    def finish_worker(self, worker_id: str, result: str = "completed", 
+                     error_msg: str = "", result_data: str = "") -> bool:
+        """Mark a worker as finished.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            result: Result status ('completed', 'error', 'cancelled')
+            error_msg: Error message if any
+            result_data: Result data as JSON string
+            
+        Returns:
+            True if update was successful
+        """
+        try:
+            kwargs = {
+                'status': result,
+                'completed_at': datetime.now().isoformat()
+            }
+            if error_msg:
+                kwargs['error_message'] = error_msg
+            if result_data:
+                kwargs['result_data'] = result_data
+            
+            success = self.db.update_worker(worker_id, **kwargs)
+            logger.info(f"[WorkerManager] Worker finished: {worker_id} ({result})")
+            self._worker_last_step.pop(worker_id, None)
+            return success
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error finishing worker {worker_id}: {e}", exc_info=True)
+            return False
+    
+    def get_active_workers(self) -> List[Dict[str, Any]]:
+        """Get all active (running) workers.
+        
+        Returns:
+            List of active worker dictionaries
+        """
+        try:
+            return self.db.get_active_workers()
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error getting active workers: {e}", exc_info=True)
+            return []
+    
+    def get_finished_workers(self, limit: int = 100) -> List[Dict[str, Any]]:
+        """Get all finished workers (completed, errored, or cancelled).
+        
+        Args:
+            limit: Maximum number of workers to retrieve
+        
+        Returns:
+            List of finished worker dictionaries
+        """
+        try:
+            all_workers = self.db.get_all_workers(limit=limit)
+            # Filter to only finished workers
+            finished = [w for w in all_workers if w.get('status') in ['completed', 'error', 'cancelled']]
+            return finished
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error getting finished workers: {e}", exc_info=True)
+            return []
+    
+    def get_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
+        """Get a specific worker's data.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            
+        Returns:
+            Worker data or None if not found
+        """
+        try:
+            return self.db.get_worker(worker_id)
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error getting worker {worker_id}: {e}", exc_info=True)
+            return None
+    
+    def get_worker_events(self, worker_id: str, limit: int = 500) -> List[Dict[str, Any]]:
+        """Fetch recorded worker timeline events."""
+        return self.db.get_worker_events(worker_id, limit)
+    
+    def log_step(self, worker_id: str, step_text: str) -> bool:
+        """Log a step to a worker's step history.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            step_text: Step description to log
+            
+        Returns:
+            True if successful
+        """
+        try:
+            success = self.db.append_worker_steps(worker_id, step_text)
+            if success:
+                self._worker_last_step[worker_id] = step_text
+            return success
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True)
+            return False
+    
+    def _get_last_step(self, worker_id: str) -> Optional[str]:
+        """Return the most recent step description for a worker."""
+        return self._worker_last_step.get(worker_id)
+    
+    def get_steps(self, worker_id: str) -> str:
+        """Get step logs for a worker.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            
+        Returns:
+            Steps text or empty string if not found
+        """
+        try:
+            return self.db.get_worker_steps(worker_id)
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True)
+            return ''
+    
+    def start_auto_refresh(self) -> None:
+        """Start the auto-refresh thread for periodic worker updates."""
+        if self.auto_refresh_interval <= 0:
+            logger.debug("[WorkerManager] Auto-refresh disabled (interval <= 0)")
+            return
+        
+        if self.refresh_thread and self.refresh_thread.is_alive():
+            logger.debug("[WorkerManager] Auto-refresh already running")
+            return
+        
+        logger.info(f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval")
+        self._stop_refresh = False
+        self.refresh_thread = Thread(target=self._auto_refresh_loop, daemon=True)
+        self.refresh_thread.start()
+    
+    def stop_auto_refresh(self) -> None:
+        """Stop the auto-refresh thread."""
+        logger.info("[WorkerManager] Stopping auto-refresh")
+        self._stop_refresh = True
+        if self.refresh_thread:
+            self.refresh_thread.join(timeout=5)
+            self.refresh_thread = None
+    
+    def _start_refresh_if_needed(self) -> None:
+        """Start auto-refresh if we have active workers and callbacks."""
+        active = self.get_active_workers()
+        if active and self.refresh_callbacks and not self._stop_refresh:
+            self.start_auto_refresh()
+    
+    def _auto_refresh_loop(self) -> None:
+        """Main auto-refresh loop that periodically queries and notifies."""
+        try:
+            while not self._stop_refresh:
+                time.sleep(self.auto_refresh_interval)
+                
+                # Check if there are active workers
+                active = self.get_active_workers()
+                
+                if not active:
+                    # No more active workers, stop refreshing
+                    logger.debug("[WorkerManager] No active workers, stopping auto-refresh")
+                    break
+                
+                # Call all registered callbacks with the active workers
+                with self._lock:
+                    for callback in self.refresh_callbacks:
+                        try:
+                            callback(active)
+                        except Exception as e:
+                            logger.error(f"[WorkerManager] Error in refresh callback: {e}", exc_info=True)
+        
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error in auto-refresh loop: {e}", exc_info=True)
+        finally:
+            logger.debug("[WorkerManager] Auto-refresh loop ended")
+    
+    def cleanup_old_workers(self, days: int = 7) -> int:
+        """Clean up completed/errored workers older than specified days.
+        
+        Args:
+            days: Delete workers completed more than this many days ago
+            
+        Returns:
+            Number of workers deleted
+        """
+        try:
+            count = self.db.cleanup_old_workers(days)
+            if count > 0:
+                logger.info(f"[WorkerManager] Cleaned up {count} old workers")
+            return count
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error cleaning up old workers: {e}", exc_info=True)
+            return 0
+    
+    def append_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
+        """Append text to a worker's stdout log.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            text: Text to append
+            channel: Logical channel (stdout, stderr, log, etc.)
+            
+        Returns:
+            True if append was successful
+        """
+        try:
+            step_label = self._get_last_step(worker_id)
+            return self.db.append_worker_stdout(worker_id, text, step=step_label, channel=channel)
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
+            return False
+    
+    def get_stdout(self, worker_id: str) -> str:
+        """Get stdout logs for a worker.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            
+        Returns:
+            Worker's stdout or empty string
+        """
+        try:
+            return self.db.get_worker_stdout(worker_id)
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error getting stdout: {e}", exc_info=True)
+            return ""
+    
+    def append_worker_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
+        """Compatibility wrapper for append_stdout."""
+        return self.append_stdout(worker_id, text, channel=channel)
+    
+    def clear_stdout(self, worker_id: str) -> bool:
+        """Clear stdout logs for a worker.
+        
+        Args:
+            worker_id: Unique identifier for the worker
+            
+        Returns:
+            True if clear was successful
+        """
+        try:
+            return self.db.clear_worker_stdout(worker_id)
+        except Exception as e:
+            logger.error(f"[WorkerManager] Error clearing stdout: {e}", exc_info=True)
+            return False
+    
+    def close(self) -> None:
+        """Close the worker manager and database connection."""
+        self.stop_auto_refresh()
+        self.db.close()
+        logger.info("[WorkerManager] Closed")