AST

2025-11-25 20:09:33 -08:00
parent d75c644a82
commit bd69119996
80 changed files with 39615 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,42 @@
 # Auto detect text files and normalize line endings to LF
 * text=auto
 # Python files
 *.py text eol=lf
 *.pyx text eol=lf
 *.pyi text eol=lf
 # Shell scripts
 *.sh text eol=lf
 *.bash text eol=lf
 # Windows batch files
 *.bat text eol=crlf
 *.cmd text eol=crlf
 *.ps1 text eol=crlf
 # Config files
 *.json text eol=lf
 *.toml text eol=lf
 *.yaml text eol=lf
 *.yml text eol=lf
 *.ini text eol=lf
 *.cfg text eol=lf
 # Documentation
 *.md text eol=lf
 README text eol=lf
 LICENSE text eol=lf
 # Binary files
 *.db binary
 *.sqlite binary
 *.png binary
 *.jpg binary
 *.jpeg binary
 *.gif binary
 *.webp binary
 *.mov binary
 *.mp4 binary
 *.webm binary
 *.pdf binary
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,219 @@
 # ---> Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 config.json
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 config.json
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # UV
 #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #uv.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
 .pdm-python
 .pdm-build/
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 # Ruff stuff:
 .ruff_cache/
 # PyPI configuration file
 .pypirc
 # ---> Lua
 # Compiled Lua sources
 luac.out
 # luarocks build files
 *.src.rock
 *.zip
 *.tar.gz
 # Object files
 *.o
 *.os
 *.ko
 *.obj
 *.elf
 # Precompiled Headers
 *.gch
 *.pch
 # Libraries
 *.lib
 *.a
 *.la
 *.lo
 *.def
 *.exp
 # Shared objects (inc. Windows DLLs)
 *.dll
 *.so
 *.so.*
 *.dylib
 # Executables
 *.exe
 *.out
 *.app
 *.i*86
 *.x86_64
 *.hex
--- a/CLI.py
+++ b/CLI.py
--- a/copy.md
+++ b/copy.md
@@ -0,0 +1,64 @@
 # Medeia-Macina
 A powerful CLI media management and search platform integrating local files, Hydrus, torrents, books, and P2P networks.
 ## Key Features
 *   **Unified Search**: Search across Local, Hydrus, LibGen, Soulseek, and Debrid.
 *   **Pipeline Architecture**: Chain commands like PowerShell (e.g., `search | filter | download`).
 *   **Smart Selection**: Use `@N` syntax to interact with results.
 *   **Metadata Management**: Tagging, notes, and relationships.
 ## Installation
 1.  Install Python 3.9+ and [Deno](https://deno.com/) (for YouTube support).
 2.  Install dependencies: `pip install -r requirements.txt`
 3.  Run the CLI: `python CLI.py`
 ## Command Examples
 ### Search & Download
 ```powershell
 # Search and download the first result
 search-file "daughter" | @1 | download-data
 # Search specific provider and download
 search-file -provider libgen "dune" | @1 | download-data
 # Download YouTube video (auto-probes formats)
 download-data "https://youtube.com/watch?v=..."
 # Select format #2 from the list
@2 | download-data
 ```
 ### File Management
 ```powershell
 # Add file to Hydrus
 add-file -path "C:\Videos\movie.mp4" -storage hydrus
 # Upload to 0x0.st and associate URL with Hydrus file
 search-file "my_video" | @1 | add-file -provider 0x0
 # Add tags to a file
 search-file "video" | @1 | add-tag "creator:someone, character:hero"
 # Use tag lists (from helper/adjective.json)
@1 | add-tag "{gnostic}"
 ```
 ### Metadata & Notes
 ```powershell
 # Add a note
 search-file "doc" | @1 | add-note "comment" "This is important"
 # Get tags
 search-file "image" | @1 | get-tag
 ```
 ### Pipeline Syntax
 *   `|` : Pipe results from one command to another.
 *   `@N` : Select the Nth item from the previous result (e.g., `@1`).
 *   `@N-M` : Select a range (e.g., `@1-5`).
 *   `@{1,3,5}` : Select specific items.
 *   `@*` : Select all items.
 ## Configuration
 Edit `config.json` to set API keys (AllDebrid, OpenAI), storage paths, and Hydrus credentials.
--- a/TUI/init.py
+++ b/TUI/init.py
@@ -0,0 +1 @@
 """Medeia-Macina TUI - Terminal User Interface."""
--- a/TUI/menu_actions.py
+++ b/TUI/menu_actions.py
@@ -0,0 +1,105 @@
 """Utilities that drive the modern Textual UI menus and presets."""
 from __future__ import annotations
 import sys
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Sequence
 BASE_DIR = Path(__file__).resolve().parent
 ROOT_DIR = BASE_DIR.parent
 for path in (ROOT_DIR, BASE_DIR):
    str_path = str(path)
    if str_path not in sys.path:
        sys.path.insert(0, str_path)
 import metadata
@dataclass(slots=True)
 class PipelinePreset:
    """Simple descriptor for a reusable pipeline."""
    label: str
    description: str
    pipeline: str
 PIPELINE_PRESETS: List[PipelinePreset] = [
    PipelinePreset(
        label="Download → Merge → Local",
        description="Use download-data with playlist auto-selection, merge the pieces, tag, then import into local storage.",
        pipeline='download-data "<url>" | merge-file | add-tag | add-file -storage local',
    ),
    PipelinePreset(
        label="Download → Hydrus",
        description="Fetch media, auto-tag, and push directly into Hydrus.",
        pipeline='download-data "<url>" | merge-file | add-tag | add-file -storage hydrus',
    ),
    PipelinePreset(
        label="Search Local Library",
        description="Run search-file against the local library and emit a result table for further piping.",
        pipeline='search-file -library local -query "<keywords>"',
    ),
 ]
 def load_tags(file_path: Path) -> List[str]:
    """Read tags for a file using metadata.py as the single source of truth."""
    try:
        return metadata.read_tags_from_file(file_path)
    except Exception:
        return []
 def group_tags_by_namespace(tags: Sequence[str]) -> Dict[str, List[str]]:
    """Return tags grouped by namespace for quick UI summaries."""
    grouped: Dict[str, List[str]] = {}
    for tag in metadata.normalize_tags(list(tags)):
        namespace, value = metadata.split_tag(tag)
        key = namespace or "_untagged"
        grouped.setdefault(key, []).append(value)
    for items in grouped.values():
        items.sort()
    return grouped
 def build_metadata_snapshot(file_path: Path) -> Dict[str, Any]:
    """Load any available sidecar metadata for the selected file."""
    snapshot: Dict[str, Any] = {
        "file": str(file_path),
        "tags": group_tags_by_namespace(load_tags(file_path)),
    }
    try:
        sidecar = metadata._derive_sidecar_path(file_path)
        if sidecar.is_file():
            title, tags, notes = metadata._read_sidecar_metadata(sidecar)
            snapshot["sidecar"] = {
                "title": title,
                "tags": group_tags_by_namespace(tags),
                "notes": notes,
            }
    except Exception:
        snapshot["sidecar"] = None
    return snapshot
 def summarize_result(result: Dict[str, Any]) -> str:
    """Build a one-line summary for a pipeline result row."""
    title = result.get("title") or result.get("identifier") or result.get("file_path")
    source = result.get("source") or result.get("cmdlet") or "result"
    return f"{source}: {title}" if title else source
 def normalize_tags(tags: Iterable[str]) -> List[str]:
    """Expose metadata.normalize_tags for callers that imported the old helper."""
    return metadata.normalize_tags(list(tags))
--- a/TUI/modalscreen/init.py
+++ b/TUI/modalscreen/init.py
@@ -0,0 +1,7 @@
 """Modal screens for the Downlow Hub UI application."""
 from .export import ExportModal
 from .search import SearchModal
 from .workers import WorkersModal
 __all__ = ["ExportModal", "SearchModal", "WorkersModal"]
--- a/TUI/modalscreen/access.py
+++ b/TUI/modalscreen/access.py
@@ -0,0 +1,139 @@
 """Modal for displaying files/URLs to access in web mode."""
 from textual.screen import ModalScreen
 from textual.containers import Container, Vertical, Horizontal
 from textual.widgets import Static, Button, Label
 from textual.app import ComposeResult
 import logging
 logger = logging.getLogger(__name__)
 class AccessModal(ModalScreen):
    """Modal to display a file/URL that can be accessed from phone browser."""
    CSS = """
    Screen {
        align: center middle;
    }
    #access-container {
        width: 80;
        height: auto;
        border: thick $primary;
        background: $surface;
    }
    #access-header {
        dock: top;
        height: 3;
        background: $boost;
        border-bottom: solid $accent;
        content-align: center middle;
    }
    #access-content {
        height: auto;
        width: 1fr;
        padding: 1 2;
        border-bottom: solid $accent;
    }
    #access-footer {
        dock: bottom;
        height: 3;
        background: $boost;
        border-top: solid $accent;
        align: center middle;
    }
    .access-url {
        width: 1fr;
        height: auto;
        margin-bottom: 1;
        border: solid $accent;
        padding: 1;
    }
    .access-label {
        width: 1fr;
        height: auto;
        margin-bottom: 1;
    }
    Button {
        margin-right: 1;
    }
    """
    def __init__(self, title: str, content: str, is_url: bool = False):
        """Initialize access modal.
        Args:
            title: Title of the item being accessed
            content: The URL or file path
            is_url: Whether this is a URL (True) or file path (False)
        """
        super().__init__()
        self.item_title = title
        self.item_content = content
        self.is_url = is_url
    def compose(self) -> ComposeResult:
        """Create the modal layout."""
        with Container(id="access-container"):
            with Vertical(id="access-header"):
                yield Label(f"[bold]{self.item_title}[/bold]")
                yield Label("[dim]Click link below to open in your browser[/dim]")
            with Vertical(id="access-content"):
                if self.is_url:
                    yield Label("[bold cyan]Link:[/bold cyan]", classes="access-label")
                else:
                    yield Label("[bold cyan]File:[/bold cyan]", classes="access-label")
                # Display as clickable link using HTML link element for web mode
                # Rich link markup `[link=URL]` has parsing issues with URLs containing special chars
                # Instead, use the HTML link markup that Textual-serve renders as <a> tag
                # Format: [link=URL "tooltip"]text[/link] - the quotes help with parsing
                link_text = f'[link="{self.item_content}"]Open in Browser[/link]'
                content_box = Static(link_text, classes="access-url")
                yield content_box
                # Also show the URL for reference/copying
                yield Label(self.item_content, classes="access-label")
                yield Label("\n[yellow]↑ Click the link above to open on your device[/yellow]", classes="access-label")
            with Horizontal(id="access-footer"):
                yield Button("Copy URL", id="copy-btn", variant="primary")
                yield Button("Close", id="close-btn", variant="default")
    def on_button_pressed(self, event: Button.Pressed) -> None:
        """Handle button presses."""
        if event.button.id == "copy-btn":
            # Copy to clipboard (optional - not critical if fails)
            logger.info(f"Attempting to copy: {self.item_content}")
            try:
                # Try to use pyperclip if available
                try:
                    import pyperclip
                    pyperclip.copy(self.item_content)
                    logger.info("URL copied to clipboard via pyperclip")
                except ImportError:
                    # Fallback: try xclip on Linux or pbcopy on Mac
                    import subprocess
                    import sys
                    if sys.platform == "win32":
                        # Windows: use clipboard via pyperclip (already tried)
                        logger.debug("Windows clipboard not available without pyperclip")
                    else:
                        # Linux/Mac
                        process = subprocess.Popen(['xclip', '-selection', 'clipboard'], stdin=subprocess.PIPE)
                        process.communicate(self.item_content.encode('utf-8'))
                        logger.info("URL copied to clipboard via xclip")
            except Exception as e:
                logger.debug(f"Clipboard copy not available: {e}")
                # Not critical - just informational
        elif event.button.id == "close-btn":
            self.dismiss()
--- a/TUI/modalscreen/download.py
+++ b/TUI/modalscreen/download.py
--- a/TUI/modalscreen/download.tcss
+++ b/TUI/modalscreen/download.tcss
@@ -0,0 +1,183 @@
 /* Download Modal Screen Stylesheet */
 Screen {
    background: $surface;
    overlay: screen;
 }
 #download_modal {
    width: 100%;
    height: 100%;
    border: heavy $primary;
    background: $boost;
 }
 #download_title {
    dock: top;
    height: 1;
    content-align: center middle;
    background: $primary;
    color: $text;
    text-style: bold;
    padding: 0 1;
 }
 /* Main horizontal layout: 2 columns left/right split */
 #main_layout {
    width: 1fr;
    height: 1fr;
    layout: horizontal;
    padding: 1;
    border: none;
 }
 /* Left column */
 #left_column {
    width: 2fr;
    height: 1fr;
    layout: vertical;
 }
 /* Right column */
 #right_column {
    width: 1fr;
    height: 1fr;
    layout: vertical;
 }
 /* All containers styling */
 .grid_container {
    width: 1fr;
    height: 1fr;
    padding: 1;
    layout: vertical;
    margin: 0 0 1 0;
 }
 #tags_container {
    border: mediumpurple;
 }
 #url_container {
    border: solid $accent;
 }
 #files_container {
    border: solid $accent;
 }
 #playlist_container {
    border: solid $accent;
    layout: vertical;
    height: 0;
 }
 #playlist_tree {
    width: 1fr;
    height: auto;
    border: none;
    padding: 0;
 }
 #playlist_input {
    width: 1fr;
    height: 1;
    border: none;
    padding: 0 1;
    margin: 1 0 0 0;
 }
 #playlist_input_row {
    width: 1fr;
    height: auto;
    layout: horizontal;
    margin: 1 0 0 0;
 }
 .section_title {
    width: 1fr;
    height: 1;
    text-align: left;
    color: $text-muted;
    text-style: bold;
    margin: 0 0 0 0;
    padding: 0;
 }
 /* TextArea widgets in containers */
 #tags_textarea {
    width: 1fr;
    height: 1fr;
    border: none;
    padding: 0;
 }
 #paragraph_textarea {
    width: 1fr;
    height: 1fr;
    border: none;
    padding: 0;
 }
 /* Select widgets in containers */
 #files_select {
    width: 1fr;
    height: 1fr;
    border: none;
 }
 /* Footer layout - horizontal: checkboxes left, source middle, buttons right */
 #footer_layout {
    width: 1fr;
    height: auto;
    layout: horizontal;
    padding: 1;
    margin: 0;
    background: $boost;
 }
 #checkbox_row {
    width: auto;
    height: auto;
    layout: horizontal;
    align: left middle;
 }
 #source_select {
    width: 30;
    height: 1;
    border: none;
    padding: 0 1;
    margin: 0;
 }
 #button_row {
    width: auto;
    height: auto;
    layout: horizontal;
    align: right middle;
 }
 /* Progress bar - shown during download */
 #progress_bar {
    width: 1fr;
    height: 0;
 }
 /* Checkbox and Button styling */
 Checkbox {
    margin: 0 2 0 0;
 }
 Button {
    margin: 0 1 0 0;
    width: 12;
 }
 #cancel_btn {
    width: 12;
 }
 #submit_btn {
    width: 12;
 }
--- a/TUI/modalscreen/export.py
+++ b/TUI/modalscreen/export.py
@@ -0,0 +1,512 @@
 """Export modal screen for exporting files with metadata."""
 from textual.app import ComposeResult
 from textual.screen import ModalScreen
 from textual.containers import Container, Horizontal, Vertical
 from textual.widgets import Static, Button, Input, TextArea, Tree, Select
 from textual.binding import Binding
 import logging
 from typing import Optional, Any
 from pathlib import Path
 import json
 import sys
 import subprocess
 from datetime import datetime
 # Add parent directory to path for imports
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from helper.utils import format_metadata_value
 from config import load_config
 logger = logging.getLogger(__name__)
 class ExportModal(ModalScreen):
    """Modal screen for exporting files with metadata and tags."""
    BINDINGS = [
        Binding("escape", "cancel", "Cancel"),
    ]
    CSS_PATH = "export.tcss"
    def __init__(self, result_data: Optional[dict] = None, hydrus_available: bool = False, debrid_available: bool = False):
        """Initialize the export modal with result data.
        Args:
            result_data: Dictionary containing:
                - title: str - Item title
                - tags: str - Comma-separated tags
                - metadata: dict - File metadata (source-specific from item.metadata or local DB)
                - source: str - Source identifier ('local', 'hydrus', 'debrid', etc)
                - current_result: object - The full search result object
            hydrus_available: bool - Whether Hydrus API is available
            debrid_available: bool - Whether Debrid API is available
        """
        super().__init__()
        self.result_data = result_data or {}
        self.hydrus_available = hydrus_available
        self.debrid_available = debrid_available
        self.metadata_display: Optional[Static] = None
        self.tags_textarea: Optional[TextArea] = None
        self.export_to_select: Optional[Select] = None
        self.custom_path_input: Optional[Input] = None
        self.libraries_select: Optional[Select] = None
        self.size_input: Optional[Input] = None
        self.format_select: Optional[Select] = None
        self.file_ext: Optional[str] = None  # Store the file extension for format filtering
        self.file_type: Optional[str] = None  # Store the file type (audio, video, image, document)
        self.default_format: Optional[str] = None  # Store the default format to set after mount
    def _determine_file_type(self, ext: str) -> tuple[str, list]:
        """Determine file type from extension and return type and format options.
        Args:
            ext: File extension (e.g., '.mp3', '.mp4', '.jpg')
        Returns:
            Tuple of (file_type, format_options) where format_options is a list of (label, value) tuples
        """
        ext_lower = ext.lower() if ext else ''
        # Audio formats
        audio_exts = {'.mp3', '.flac', '.wav', '.aac', '.ogg', '.m4a', '.wma', '.opus', '.mka'}
        audio_formats = [("MKA", "mka"), ("MP3", "mp3"), ("M4A", "m4a"), ("FLAC", "flac"), ("WAV", "wav"), ("AAC", "aac"), ("OGG", "ogg"), ("Opus", "opus")]
        # Video formats (can have audio too)
        video_exts = {'.mp4', '.mkv', '.webm', '.avi', '.mov', '.flv', '.wmv', '.m4v', '.ts', '.mpg', '.mpeg'}
        video_formats = [("MP4", "mp4"), ("MKV", "mkv"), ("WebM", "webm"), ("AVI", "avi"), ("MOV", "mov")]
        # Image formats
        image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.ico'}
        image_formats = [("JPG", "jpg"), ("PNG", "png"), ("WebP", "webp"), ("GIF", "gif"), ("BMP", "bmp")]
        # Document formats - no conversion for now
        document_exts = {'.pdf', '.epub', '.txt', '.docx', '.doc', '.rtf', '.md', '.html', '.mobi', '.cbz', '.cbr'}
        document_formats = []
        if ext_lower in audio_exts:
            return ('audio', audio_formats)
        elif ext_lower in video_exts:
            return ('video', video_formats)
        elif ext_lower in image_exts:
            return ('image', image_formats)
        elif ext_lower in document_exts:
            return ('document', document_formats)
        else:
            # Default to audio if unknown
            return ('unknown', audio_formats)
    def _get_library_options(self) -> list:
        """Get available library options from config.json."""
        options = [("Local", "local")]
        try:
            # Try to load config
            config_path = Path(__file__).parent.parent / "config.json"
            if not config_path.exists():
                return options
            with open(config_path, 'r') as f:
                config = json.load(f)
            # Check if Hydrus is configured AND available (supports both new and old format)
            from config import get_hydrus_instance
            hydrus_instance = get_hydrus_instance(config, "home")
            if self.hydrus_available and hydrus_instance and hydrus_instance.get("key") and hydrus_instance.get("url"):
                options.append(("Hydrus Network", "hydrus"))
            # Check if Debrid is configured AND available (supports both new and old format)
            from config import get_debrid_api_key
            debrid_api_key = get_debrid_api_key(config)
            if self.debrid_available and debrid_api_key:
                options.append(("Debrid", "debrid"))
        except Exception as e:
            logger.error(f"Error loading config for libraries: {e}")
        return options
    def _get_metadata_text(self) -> str:
        """Format metadata from result data in a consistent display format."""
        metadata = self.result_data.get('metadata', {})
        source = self.result_data.get('source', 'unknown')
        logger.info(f"_get_metadata_text called - source: {source}, metadata type: {type(metadata)}, keys: {list(metadata.keys()) if metadata else 'empty'}")
        if not metadata:
            logger.info(f"_get_metadata_text - No metadata found, returning 'No metadata available'")
            return "No metadata available"
        lines = []
        # Only display these specific fields in this order
        display_fields = [
            'duration', 'size', 'ext', 'media_type', 'time_imported', 'time_modified', 'hash'
        ]
        # Display fields in a consistent order
        for field in display_fields:
            if field in metadata:
                value = metadata[field]
                # Skip complex types and None values
                if isinstance(value, (dict, list)) or value is None:
                    continue
                # Use central formatting rule
                formatted_value = format_metadata_value(field, value)
                # Format: "Field Name: value"
                field_label = field.replace('_', ' ').title()
                lines.append(f"{field_label}: {formatted_value}")
        # If we found any fields, display them
        if lines:
            logger.info(f"_get_metadata_text - Returning {len(lines)} formatted metadata lines")
            return "\n".join(lines)
        else:
            logger.info(f"_get_metadata_text - No matching fields found in metadata")
            return "No metadata available"
    def compose(self) -> ComposeResult:
        """Compose the export modal screen."""
        with Container(id="export-container"):
            yield Static("Export File with Metadata", id="export-title")
            # Row 1: Three columns (Tags, Metadata, Export-To Options)
            self.tags_textarea = TextArea(
                text=self._format_tags(),
                id="tags-area",
                read_only=False,
            )
            yield self.tags_textarea
            self.tags_textarea.border_title = "Tags"
            # Metadata display instead of files tree
            self.metadata_display = Static(
                self._get_metadata_text(),
                id="metadata-display",
            )
            yield self.metadata_display
            self.metadata_display.border = ("solid", "dodgerblue")
            # Right column: Export options
            with Vertical(id="export-options"):
                # Export To selector
                self.export_to_select = Select(
                    [("0x0", "0x0"), ("Libraries", "libraries"), ("Custom Path", "path")],
                    id="export-to-select"
                )
                yield self.export_to_select
                # Libraries selector (initially hidden)
                library_options = self._get_library_options()
                self.libraries_select = Select(
                    library_options,
                    id="libraries-select"
                )
                yield self.libraries_select
                # Custom path input (initially hidden)
                self.custom_path_input = Input(
                    placeholder="Enter custom export path",
                    id="custom-path-input"
                )
                yield self.custom_path_input
                # Get metadata for size and format options
                metadata = self.result_data.get('metadata', {})
                original_size = metadata.get('size', '')
                ext = metadata.get('ext', '')
                # Store the extension and determine file type
                self.file_ext = ext
                self.file_type, format_options = self._determine_file_type(ext)
                # Format size in MB for display
                if original_size:
                    size_mb = int(original_size / (1024 * 1024)) if isinstance(original_size, (int, float)) else original_size
                    size_display = f"{size_mb}Mb"
                else:
                    size_display = ""
                # Size input
                self.size_input = Input(
                    value=size_display,
                    placeholder="Size (can reduce)",
                    id="size-input",
                    disabled=(self.file_type == 'document')  # Disable for documents - no resizing needed
                )
                yield self.size_input
                # Determine the default format value (match current extension to format options)
                default_format = None
                if ext and format_options:
                    # Map extension to format value (e.g., .flac -> "flac", .mp3 -> "mp3", .m4a -> "m4a")
                    ext_lower = ext.lower().lstrip('.')  # Remove leading dot if present
                    # Try to find matching format option
                    for _, value in format_options:
                        if value and (ext_lower == value or f".{ext_lower}" == ext or ext.endswith(f".{value}")):
                            default_format = value
                            logger.debug(f"Matched extension {ext} to format {value}")
                            break
                    # If no exact match, use first option
                    if not default_format and format_options:
                        default_format = format_options[0][1]
                        logger.debug(f"No format match for {ext}, using first option: {default_format}")
                # Store the default format to apply after mount
                self.default_format = default_format
                # Format selector based on file type
                self.format_select = Select(
                    format_options if format_options else [("No conversion", "")],
                    id="format-select",
                    disabled=not format_options  # Disable if no format options (e.g., documents)
                )
                yield self.format_select
            # Row 2: Buttons
            with Horizontal(id="export-buttons"):
                yield Button("Cancel", id="cancel-btn", variant="default")
                yield Button("Export", id="export-btn", variant="primary")
    def _format_tags(self) -> str:
        """Format tags from result data."""
        tags = self.result_data.get('tags', '')
        if isinstance(tags, str):
            # Split by comma and rejoin with newlines
            tags_list = [tag.strip() for tag in tags.split(',') if tag.strip()]
            return '\n'.join(tags_list)
        elif isinstance(tags, list):
            return '\n'.join(tags)
        return ''
    def on_button_pressed(self, event: Button.Pressed) -> None:
        """Handle button press events."""
        button_id = event.button.id
        if button_id == "export-btn":
            self._handle_export()
        elif button_id == "cancel-btn":
            self.action_cancel()
    def on_select_changed(self, event: Select.Changed) -> None:
        """Handle select widget changes."""
        if event.control.id == "export-to-select":
            # Show/hide custom path and libraries based on selection
            if self.custom_path_input:
                self.custom_path_input.display = (event.value == "path")
            if self.libraries_select:
                self.libraries_select.display = (event.value == "libraries")
        elif event.control.id == "libraries-select":
            # Handle library selection (no special action needed currently)
            logger.debug(f"Library selected: {event.value}")
    def on_mount(self) -> None:
        """Handle mount event."""
        # Initially hide custom path and libraries inputs (default is "0x0")
        if self.custom_path_input:
            self.custom_path_input.display = False
        if self.libraries_select:
            self.libraries_select.display = False
        # Set the default format value to show it selected instead of "Select"
        if self.default_format and self.format_select:
            self.format_select.value = self.default_format
            logger.debug(f"Set format selector to default value: {self.default_format}")
        # Refresh metadata display after mount to ensure data is loaded
        if self.metadata_display:
            metadata_text = self._get_metadata_text()
            self.metadata_display.update(metadata_text)
            logger.debug(f"Updated metadata display on mount: {bool(self.result_data.get('metadata'))}")
    def _handle_export(self) -> None:
        """Handle the export action."""
        try:
            tags_text = self.tags_textarea.text.strip()
            export_to = self.export_to_select.value if self.export_to_select else "0x0"
            custom_path = self.custom_path_input.value.strip() if self.custom_path_input else ""
            # Get library value - handle Select.BLANK case
            library = "local"  # default
            if self.libraries_select and str(self.libraries_select.value) != "Select.BLANK":
                library = str(self.libraries_select.value)
            elif self.libraries_select and self.libraries_select:
                # If value is Select.BLANK, try to get from the options
                try:
                    # Get first available library option as fallback
                    options = self._get_library_options()
                    if options:
                        library = options[0][1]  # Get the value part of first option tuple
                except Exception:
                    library = "local"
            size = self.size_input.value.strip() if self.size_input else ""
            file_format = self.format_select.value if self.format_select else "mp4"
            # Parse tags from textarea (one per line)
            export_tags = set()
            for line in tags_text.split('\n'):
                tag = line.strip()
                if tag:
                    export_tags.add(tag)
            # For Hydrus export, filter out metadata-only tags (hash:, known_url:, relationship:)
            if export_to == "libraries" and library == "hydrus":
                metadata_prefixes = {'hash:', 'known_url:', 'relationship:'}
                export_tags = {tag for tag in export_tags if not any(tag.lower().startswith(prefix) for prefix in metadata_prefixes)}
                logger.info(f"Filtered tags for Hydrus - removed metadata tags, {len(export_tags)} tags remaining")
                # Extract title and add as searchable tags if not already present
                title = self.result_data.get('title', '').strip()
                if title:
                    # Add the full title as a tag if not already present
                    title_tag = f"title:{title}"
                    if title_tag not in export_tags and not any(t.startswith('title:') for t in export_tags):
                        export_tags.add(title_tag)
                    # Extract individual words from title as searchable tags (if reasonable length)
                    # Skip very short words and common stop words
                    if len(title) < 100:  # Only for reasonably short titles
                        stop_words = {'the', 'a', 'an', 'and', 'or', 'of', 'in', 'to', 'for', 'is', 'it', 'at', 'by', 'from', 'with', 'as', 'be', 'on', 'that', 'this', 'this'}
                        words = title.lower().split()
                        for word in words:
                            # Clean up word (remove punctuation)
                            clean_word = ''.join(c for c in word if c.isalnum())
                            # Only add if not a stop word and has some length
                            if clean_word and len(clean_word) > 2 and clean_word not in stop_words:
                                if clean_word not in export_tags:
                                    export_tags.add(clean_word)
                        logger.info(f"Extracted {len(words)} words from title, added searchable title tags")
            # Validate required fields - allow export to continue for Hydrus even with 0 actual tags
            # (metadata tags will still be in the sidecar, and tags can be added later)
            if not export_tags and export_to != "libraries":
                logger.warning("No tags provided for export")
                return
            if export_to == "libraries" and not export_tags:
                logger.warning("No actual tags for Hydrus export (only metadata was present)")
                # Don't return - allow export to continue, file will be added to Hydrus even without tags
            # Determine export path
            export_path = None
            if export_to == "path":
                if not custom_path:
                    logger.warning("Custom path required but not provided")
                    return
                export_path = custom_path
            elif export_to == "libraries":
                export_path = library  # "local", "hydrus", "debrid"
            else:
                export_path = export_to  # "0x0"
            # Get metadata from result_data
            metadata = self.result_data.get('metadata', {})
            # Extract file source info from result_data (passed by hub-ui)
            file_hash = self.result_data.get('file_hash')
            file_url = self.result_data.get('file_url')
            file_path = self.result_data.get('file_path')  # For local files
            source = self.result_data.get('source', 'unknown')
            # Prepare export data
            export_data = {
                'export_to': export_to,
                'export_path': export_path,
                'library': library if export_to == "libraries" else None,
                'tags': export_tags,
                'size': size if size else None,
                'format': file_format,
                'metadata': metadata,
                'original_data': self.result_data,
                'file_hash': file_hash,
                'file_url': file_url,
                'file_path': file_path,  # Pass file path for local files
                'source': source,
            }
            logger.info(f"Export initiated: destination={export_path}, format={file_format}, size={size}, tags={export_tags}, source={source}, hash={file_hash}, path={file_path}")
            # Dismiss the modal and return the export data
            self.dismiss(export_data)
        except Exception as e:
            logger.error(f"Error during export: {e}", exc_info=True)
    def action_cancel(self) -> None:
        """Handle cancel action."""
        self.dismiss(None)
 def create_notes_sidecar(file_path: Path, notes: str) -> None:
    """Create a .notes sidecar file with notes text.
    Only creates file if notes are not empty.
    Args:
        file_path: Path to the exported file
        notes: Notes text
    """
    if not notes or not notes.strip():
        return
    notes_path = file_path.with_suffix(file_path.suffix + '.notes')
    try:
        with open(notes_path, 'w', encoding='utf-8') as f:
            f.write(notes.strip())
        logger.info(f"Created notes sidecar: {notes_path}")
    except Exception as e:
        logger.error(f"Failed to create notes sidecar: {e}", exc_info=True)
 def determine_needs_conversion(current_ext: str, target_format: str) -> bool:
    """Determine if conversion is needed between two formats.
    Args:
        current_ext: Current file extension (e.g., '.flac')
        target_format: Target format name (e.g., 'mp3') or NoSelection object
    Returns:
        True if conversion is needed, False if it's already the target format
    """
    # Handle NoSelection or None
    if not target_format or target_format == "" or str(target_format.__class__.__name__) == 'NoSelection':
        return False  # No conversion requested
    # Normalize the current extension
    current_ext_lower = current_ext.lower().lstrip('.')
    target_format_lower = str(target_format).lower()
    # Check if they match
    return current_ext_lower != target_format_lower
 def calculate_size_tolerance(metadata: dict, user_size_mb: Optional[str]) -> tuple[Optional[int], Optional[int]]:
    """Calculate target size with 1MB grace period.
    Args:
        metadata: File metadata containing 'size' in bytes
        user_size_mb: User-entered size like "756Mb" or empty string
    Returns:
        Tuple of (target_bytes, grace_bytes) where grace_bytes is 1MB (1048576),
        or (None, None) if no size specified
    """
    grace_bytes = 1 * 1024 * 1024  # 1MB grace period
    if not user_size_mb or not user_size_mb.strip():
        return None, grace_bytes
    try:
        # Parse the size string (format like "756Mb")
        size_str = user_size_mb.strip().lower()
        if size_str.endswith('mb'):
            size_str = size_str[:-2]
        elif size_str.endswith('m'):
            size_str = size_str[:-1]
        size_mb = float(size_str)
        target_bytes = int(size_mb * 1024 * 1024)
        return target_bytes, grace_bytes
    except (ValueError, AttributeError):
        return None, grace_bytes
--- a/TUI/modalscreen/export.tcss
+++ b/TUI/modalscreen/export.tcss
@@ -0,0 +1,85 @@
 /* Export Modal Screen Styling */
 ExportModal {
    align: center middle;
 }
 #export-container {
    width: 140;
    height: 55;
    background: $panel;
    border: solid $primary;
    layout: grid;
    grid-columns: 1fr 1fr 1fr;
    grid-rows: auto 1fr auto;
 }
 #export-title {
    height: 1;
    text-align: center;
    text-style: bold;
    color: $accent;
    background: $boost;
    padding: 1 2;
    column-span: 3;
 }
 /* Row 1: Three columns */
 #tags-area {
    height: 1fr;
    column-span: 1;
    border: solid mediumvioletred;
 }
 #metadata-display {
    height: 1fr;
    column-span: 1;
    border: solid dodgerblue;
    overflow: auto;
    padding: 1;
 }
 #export-options {
    height: 1fr;
    column-span: 1;
    border: solid mediumpurple;
    layout: vertical;
    padding: 1;
 }
 #export-options Select,
 #export-options Input {
    height: 3;
    margin: 0 0 1 0;
 }
 #custom-path-input {
    height: 3;
    margin: 0 0 1 0;
 }
 #libraries-select {
    height: 3;
    margin: 0 0 1 0;
 }
 #size-input {
    height: 3;
    margin: 0 0 1 0;
 }
 #format-select {
    height: 3;
 }
 /* Row 2: Buttons */
 #export-buttons {
    height: auto;
    column-span: 3;
    layout: horizontal;
 }
 #export-buttons Button {
    width: 1fr;
    margin: 0 1;
 }
--- a/TUI/modalscreen/search.py
+++ b/TUI/modalscreen/search.py
@@ -0,0 +1,505 @@
 """Search modal screen for OpenLibrary and Soulseek."""
 from textual.app import ComposeResult
 from textual.screen import ModalScreen
 from textual.containers import Container, Horizontal, Vertical
 from textual.widgets import Static, Button, Input, Select, DataTable, TextArea
 from textual.binding import Binding
 from textual.message import Message
 import logging
 from typing import Optional, Any, List
 from pathlib import Path
 import sys
 import asyncio
 # Add parent directory to path for imports
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from config import load_config
 logger = logging.getLogger(__name__)
 class SearchModal(ModalScreen):
    """Modal screen for searching OpenLibrary and Soulseek."""
    BINDINGS = [
        Binding("escape", "cancel", "Cancel"),
        Binding("enter", "search_focused", "Search"),
        Binding("ctrl+t", "scrape_tags", "Scrape Tags"),
    ]
    CSS_PATH = "search.tcss"
    class SearchSelected(Message):
        """Posted when user selects a search result."""
        def __init__(self, result: dict) -> None:
            self.result = result
            super().__init__()
    def __init__(self, app_instance=None):
        """Initialize the search modal.
        Args:
            app_instance: Reference to the main App instance for worker creation
        """
        super().__init__()
        self.app_instance = app_instance
        self.source_select: Optional[Select] = None
        self.search_input: Optional[Input] = None
        self.results_table: Optional[DataTable] = None
        self.tags_textarea: Optional[TextArea] = None
        self.library_source_select: Optional[Select] = None
        self.current_results: List[dict] = []
        self.is_searching = False
        self.current_worker = None  # Track worker for search operations
    def compose(self) -> ComposeResult:
        """Create child widgets for the search modal."""
        with Vertical(id="search-container"):
            yield Static("Search Books & Music", id="search-title")
            with Horizontal(id="search-controls"):
                # Source selector
                self.source_select = Select(
                    [("OpenLibrary", "openlibrary"), ("Soulseek", "soulseek")],
                    value="openlibrary",
                    id="source-select"
                )
                yield self.source_select
                # Search input
                self.search_input = Input(
                    placeholder="Enter search query...",
                    id="search-input"
                )
                yield self.search_input
                # Search button
                yield Button("Search", id="search-button", variant="primary")
            # Results table
            self.results_table = DataTable(id="results-table")
            yield self.results_table
            # Two-column layout: tags on left, source/submit on right
            with Horizontal(id="bottom-controls"):
                # Left column: Tags textarea
                with Vertical(id="tags-column"):
                    self.tags_textarea = TextArea(
                        text="",
                        id="result-tags-textarea",
                        read_only=False
                    )
                    self.tags_textarea.border_title = "Tags [Ctrl+T: Scrape]"
                    yield self.tags_textarea
                # Right column: Library source and submit button
                with Vertical(id="source-submit-column"):
                    # Library source selector (for OpenLibrary results)
                    self.library_source_select = Select(
                        [("Local", "local"), ("Download", "download")],
                        value="local",
                        id="library-source-select"
                    )
                    yield self.library_source_select
                    # Submit button
                    yield Button("Submit", id="submit-button", variant="primary")
            # Buttons at bottom
            with Horizontal(id="search-buttons"):
                yield Button("Select", id="select-button", variant="primary")
                yield Button("Download", id="download-button", variant="primary")
                yield Button("Cancel", id="cancel-button", variant="default")
    def on_mount(self) -> None:
        """Set up the table columns and focus."""
        # Set up results table columns
        self.results_table.add_columns(
            "Title",
            "Author/Artist",
            "Year/Album",
            "Details"
        )
        # Focus on search input
        self.search_input.focus()
    async def _search_openlibrary(self, query: str) -> List[dict]:
        """Search OpenLibrary for books."""
        try:
            from helper.search_provider import get_provider
            logger.info(f"[search-modal] Searching OpenLibrary for: {query}")
            # Get the OpenLibrary provider (now has smart search built-in)
            provider = get_provider("openlibrary")
            if not provider:
                logger.error("[search-modal] OpenLibrary provider not available")
                return []
            # Search using the provider (smart search is now default)
            search_results = provider.search(query, limit=20)
            formatted_results = []
            for result in search_results:
                # Extract metadata from SearchResult.full_metadata
                metadata = result.full_metadata or {}
                formatted_results.append({
                    "title": result.title,
                    "author": ", ".join(metadata.get("authors", [])) if metadata.get("authors") else "Unknown",
                    "year": metadata.get("year", ""),
                    "publisher": metadata.get("publisher", ""),
                    "isbn": metadata.get("isbn", ""),
                    "oclc": metadata.get("oclc", ""),
                    "lccn": metadata.get("lccn", ""),
                    "openlibrary_id": metadata.get("olid", ""),
                    "pages": metadata.get("pages", ""),
                    "language": metadata.get("language", ""),
                    "source": "openlibrary",
                    "columns": result.columns,
                    "raw_data": metadata
                })
            logger.info(f"[search-modal] Found {len(formatted_results)} OpenLibrary results")
            return formatted_results
        except Exception as e:
            logger.error(f"[search-modal] OpenLibrary search error: {e}", exc_info=True)
            import traceback
            traceback.print_exc()
            return []
    async def _search_soulseek(self, query: str) -> List[dict]:
        """Search Soulseek for music with automatic worker tracking."""
        try:
            from helper.search_provider import get_provider
            # Create worker for tracking
            worker = None
            if self.app_instance and hasattr(self.app_instance, 'create_worker'):
                worker = self.app_instance.create_worker(
                    'soulseek',
                    title=f"Soulseek Search: {query[:40]}",
                    description=f"Searching P2P network for music"
                )
                self.current_worker = worker
            if worker:
                worker.log_step("Connecting to Soulseek peer network...")
            logger.info(f"[search-modal] Searching Soulseek for: {query}")
            provider = get_provider("soulseek")
            search_results = provider.search(query, limit=20)
            if worker:
                worker.log_step(f"Search returned {len(search_results)} results")
            logger.info(f"[search-modal] Found {len(search_results)} Soulseek results")
            # Format results for display
            formatted_results = []
            for idx, result in enumerate(search_results):
                metadata = result.full_metadata or {}
                artist = metadata.get('artist', '')
                album = metadata.get('album', '')
                title = result.title
                track_num = metadata.get('track_num', '')
                size_bytes = result.size_bytes or 0
                # Format size as human-readable
                if size_bytes > 1024 * 1024:
                    size_str = f"{size_bytes / (1024 * 1024):.1f} MB"
                elif size_bytes > 1024:
                    size_str = f"{size_bytes / 1024:.1f} KB"
                else:
                    size_str = f"{size_bytes} B"
                # Build columns for display
                columns = [
                    ("#", str(idx + 1)),
                    ("Title", title[:50] if title else "Unknown"),
                    ("Artist", artist[:30] if artist else "(no artist)"),
                    ("Album", album[:30] if album else ""),
                ]
                formatted_results.append({
                    "title": title if title else "Unknown",
                    "artist": artist if artist else "(no artist)",
                    "album": album,
                    "track": track_num,
                    "filesize": size_str,
                    "bitrate": "",  # Not available in Soulseek results
                    "source": "soulseek",
                    "columns": columns,
                    "raw_data": result.to_dict()
                })
            return formatted_results
        except Exception as e:
            logger.error(f"[search-modal] Soulseek search error: {e}")
            import traceback
            traceback.print_exc()
            return []
    async def _perform_search(self) -> None:
        """Perform the actual search based on selected source."""
        if not self.search_input or not self.source_select or not self.results_table:
            logger.error("[search-modal] Widgets not initialized")
            return
        query = self.search_input.value.strip()
        if not query:
            logger.warning("[search-modal] Empty search query")
            return
        source = self.source_select.value
        # Clear existing results
        self.results_table.clear()
        self.current_results = []
        self.is_searching = True
        try:
            if source == "openlibrary":
                results = await self._search_openlibrary(query)
            elif source == "soulseek":
                results = await self._search_soulseek(query)
            else:
                logger.warning(f"[search-modal] Unknown source: {source}")
                if self.current_worker:
                    self.current_worker.finish("error", "Unknown search source")
                return
            self.current_results = results
            # Populate table with results
            if results:
                # Check if first result has columns field
                first_result = results[0]
                if "columns" in first_result and first_result["columns"]:
                    # Use dynamic columns from result
                    # Clear existing columns and rebuild based on result columns
                    self.results_table.clear()
                    # Extract column headers from first result's columns field
                    column_headers = [col[0] for col in first_result["columns"]]
                    # Remove existing columns (we'll readd them with the right headers)
                    # Note: This is a workaround since Textual's DataTable doesn't support dynamic column management well
                    # For now, we just use the dynamic column headers from the result
                    logger.info(f"[search-modal] Using dynamic columns: {column_headers}")
                    # Populate rows using the column order from results
                    for result in results:
                        if "columns" in result and result["columns"]:
                            # Extract values in column order
                            row_data = [col[1] for col in result["columns"]]
                            self.results_table.add_row(*row_data)
                        else:
                            # Fallback for results without columns
                            logger.warning(f"[search-modal] Result missing columns field: {result.get('title', 'Unknown')}")
                else:
                    # Fallback to original hardcoded behavior if columns not available
                    logger.info("[search-modal] No dynamic columns found, using default formatting")
                    for result in results:
                        if source == "openlibrary":
                            # Format OpenLibrary results (original hardcoded)
                            year = str(result.get("year", ""))[:4] if result.get("year") else ""
                            details = f"ISBN: {result.get('isbn', '')}" if result.get('isbn') else ""
                            if result.get('openlibrary_id'):
                                details += f" | OL: {result.get('openlibrary_id')}"
                            row_data = [
                                result["title"][:60],
                                result["author"][:35],
                                year,
                                details[:40]
                            ]
                        else:  # soulseek
                            row_data = [
                                result["title"][:50],
                                result["artist"][:30],
                                result["album"][:30],
                                result['filesize']
                            ]
                        self.results_table.add_row(*row_data)
            else:
                # Add a "no results" message
                self.results_table.add_row("No results found", "", "", "")
            # Finish worker if tracking
            if self.current_worker:
                self.current_worker.finish("completed", f"Found {len(results)} results")
        except Exception as e:
            logger.error(f"[search-modal] Search error: {e}")
            if self.current_worker:
                self.current_worker.finish("error", f"Search failed: {str(e)}")
        finally:
            self.is_searching = False
    def on_button_pressed(self, event: Button.Pressed) -> None:
        """Handle button presses."""
        button_id = event.button.id
        if button_id == "search-button":
            # Run search asynchronously
            asyncio.create_task(self._perform_search())
        elif button_id == "select-button":
            # Get selected row and populate tags textarea
            if self.results_table and self.results_table.row_count > 0:
                selected_row = self.results_table.cursor_row
                if 0 <= selected_row < len(self.current_results):
                    result = self.current_results[selected_row]
                    # Populate tags textarea with result metadata
                    self._populate_tags_from_result(result)
            else:
                logger.warning("[search-modal] No results to select")
        elif button_id == "download-button":
            # Download the selected result
            if self.current_results and self.results_table.row_count > 0:
                selected_row = self.results_table.cursor_row
                if 0 <= selected_row < len(self.current_results):
                    result = self.current_results[selected_row]
                    if result.get("source") == "openlibrary":
                        asyncio.create_task(self._download_book(result))
                    else:
                        logger.warning("[search-modal] Download only supported for OpenLibrary results")
            else:
                logger.warning("[search-modal] No result selected for download")
        elif button_id == "submit-button":
            # Submit the current result with tags and source
            if self.current_results and self.results_table.row_count > 0:
                selected_row = self.results_table.cursor_row
                if 0 <= selected_row < len(self.current_results):
                    result = self.current_results[selected_row]
                    # Get tags from textarea
                    tags_text = self.tags_textarea.text if self.tags_textarea else ""
                    # Get library source (if OpenLibrary)
                    library_source = self.library_source_select.value if self.library_source_select else "local"
                    # Add tags and source to result
                    result["tags_text"] = tags_text
                    result["library_source"] = library_source
                    # Post message and dismiss
                    self.post_message(self.SearchSelected(result))
                    self.dismiss(result)
            else:
                logger.warning("[search-modal] No result selected for submission")
        elif button_id == "cancel-button":
            self.dismiss(None)
    def _populate_tags_from_result(self, result: dict) -> None:
        """Populate the tags textarea from a selected result."""
        if not self.tags_textarea:
            return
        # Format tags based on result source
        if result.get("source") == "openlibrary":
            # For OpenLibrary: title, author, year
            title = result.get("title", "")
            author = result.get("author", "")
            year = result.get("year", "")
            tags = []
            if title:
                tags.append(title)
            if author:
                tags.append(author)
            if year:
                tags.append(year)
            tags_text = "\n".join(tags)
        else:  # soulseek
            # For Soulseek: artist, album, title, track
            tags = []
            if result.get("artist"):
                tags.append(result["artist"])
            if result.get("album"):
                tags.append(result["album"])
            if result.get("track"):
                tags.append(f"Track {result['track']}")
            if result.get("title"):
                tags.append(result["title"])
            tags_text = "\n".join(tags)
        self.tags_textarea.text = tags_text
        logger.info(f"[search-modal] Populated tags textarea from result")
    async def _download_book(self, result: dict) -> None:
        """Download a book from OpenLibrary using unified downloader."""
        try:
            from helper.unified_book_downloader import UnifiedBookDownloader
            from config import load_config
            logger.info(f"[search-modal] Starting download for: {result.get('title')}")
            config = load_config()
            downloader = UnifiedBookDownloader(config=config)
            # Get download options for this book
            options = downloader.get_download_options(result)
            if not options['methods']:
                logger.warning(f"[search-modal] No download methods available for: {result.get('title')}")
                # Could show a modal dialog here
                return
            # For now, use the first available method (we could show a dialog to choose)
            method = options['methods'][0]
            logger.info(f"[search-modal] Using download method: {method.get('label')}")
            # Perform the download
            success, message = await downloader.download_book(method)
            if success:
                logger.info(f"[search-modal] Download successful: {message}")
                # Could show success dialog
            else:
                logger.warning(f"[search-modal] Download failed: {message}")
                # Could show error dialog
            downloader.close()
        except Exception as e:
            logger.error(f"[search-modal] Download error: {e}", exc_info=True)
    def action_search_focused(self) -> None:
        """Action for Enter key - only search if search input is focused."""
        if self.search_input and self.search_input.has_focus and not self.is_searching:
            asyncio.create_task(self._perform_search())
    def action_scrape_tags(self) -> None:
        """Action for Ctrl+T - populate tags from selected result."""
        if self.current_results and self.results_table and self.results_table.row_count > 0:
            try:
                selected_row = self.results_table.cursor_row
                if 0 <= selected_row < len(self.current_results):
                    result = self.current_results[selected_row]
                    self._populate_tags_from_result(result)
                    logger.info(f"[search-modal] Ctrl+T: Populated tags from result at row {selected_row}")
                else:
                    logger.warning(f"[search-modal] Ctrl+T: Invalid row index {selected_row}")
            except Exception as e:
                logger.error(f"[search-modal] Ctrl+T error: {e}")
        else:
            logger.warning("[search-modal] Ctrl+T: No results selected")
    def action_cancel(self) -> None:
        """Action for Escape key - close modal."""
        self.dismiss(None)
    def on_input_submitted(self, event: Input.Submitted) -> None:
        """Handle Enter key in search input - only trigger search here."""
        if event.input.id == "search-input":
            if not self.is_searching:
                asyncio.create_task(self._perform_search())
--- a/TUI/modalscreen/search.tcss
+++ b/TUI/modalscreen/search.tcss
@@ -0,0 +1,121 @@
 /* Search Modal Screen Styling */
 SearchModal {
    align: center middle;
 }
 Screen {
    layout: vertical;
 }
 #search-container {
    width: 140;
    height: 40;
    background: $panel;
    border: solid $primary;
    layout: vertical;
 }
 Static#search-title {
    height: 3;
    dock: top;
    text-align: center;
    text-style: bold;
    color: $accent;
    background: $boost;
    padding: 1 2;
 }
 #search-controls {
    height: auto;
    layout: horizontal;
    padding: 1;
    border: solid $primary;
 }
 #source-select {
    width: 20;
    margin-right: 1;
 }
 #search-input {
    width: 1fr;
    margin-right: 1;
 }
 #search-button {
    width: 12;
 }
 #results-table {
    height: 1fr;
    border: solid $primary;
 }
 DataTable {
    border: solid $accent;
 }
 DataTable > .datatable--header {
    background: $boost;
    color: $accent;
    text-style: bold;
 }
 DataTable > .datatable--cursor-row {
    background: $accent;
 }
 #bottom-controls {
    height: auto;
    layout: horizontal;
    padding: 1;
    border: solid $primary;
 }
 #tags-column {
    width: 1fr;
    layout: vertical;
    padding-right: 1;
    height: auto;
 }
 #result-tags-textarea {
    height: 10;
    width: 1fr;
    border: solid $accent;
 }
 #source-submit-column {
    width: 20;
    layout: vertical;
    padding-left: 1;
    height: auto;
 }
 #library-source-select {
    width: 1fr;
    margin-bottom: 1;
 }
 #submit-button {
    width: 1fr;
 }
 #search-buttons {
    height: 3;
    dock: bottom;
    layout: horizontal;
    padding: 1;
    border: solid $primary;
    align: center middle;
 }
 #select-button {
    width: 12;
    margin-right: 2;
 }
 #cancel-button {
    width: 12;
 }
--- a/TUI/modalscreen/workers.py
+++ b/TUI/modalscreen/workers.py
@@ -0,0 +1,585 @@
 """Workers modal screen for monitoring and managing background tasks."""
 from textual.app import ComposeResult
 from textual.screen import ModalScreen
 from textual.containers import Horizontal, Vertical
 from textual.widgets import Static, Button, DataTable, TextArea
 from textual.binding import Binding
 from textual.message import Message
 import logging
 from typing import Optional, Dict, List, Any
 from pathlib import Path
 import sys
 # Add parent directory to path for imports
 sys.path.insert(0, str(Path(__file__).parent.parent))
 logger = logging.getLogger(__name__)
 class WorkersModal(ModalScreen):
    """Modal screen for monitoring running and finished workers."""
    BINDINGS = [
        Binding("escape", "cancel", "Cancel"),
    ]
    CSS_PATH = "workers.tcss"
    class WorkerUpdated(Message):
        """Posted when worker list is updated."""
        def __init__(self, workers: List[Dict[str, Any]]) -> None:
            self.workers = workers
            super().__init__()
    class WorkerCancelled(Message):
        """Posted when user cancels a worker."""
        def __init__(self, worker_id: str) -> None:
            self.worker_id = worker_id
            super().__init__()
    def __init__(self, app_instance=None):
        """Initialize the workers modal.
        Args:
            app_instance: Reference to the hub app for accessing worker info
        """
        super().__init__()
        self.app_instance = app_instance
        self.running_table: Optional[DataTable] = None
        self.finished_table: Optional[DataTable] = None
        self.stdout_display: Optional[TextArea] = None
        self.running_workers: List[Dict[str, Any]] = []
        self.finished_workers: List[Dict[str, Any]] = []
        self.selected_worker_id: Optional[str] = None
        self.show_running = False  # Start with finished tab
    def compose(self) -> ComposeResult:
        """Create child widgets for the workers modal."""
        with Vertical(id="workers-container"):
            # Title with toggle buttons
            with Horizontal(id="workers-title-bar"):
                yield Static("Workers Monitor", id="workers-title")
                yield Button("Running", id="toggle-running-btn", variant="primary")
                yield Button("Finished", id="toggle-finished-btn", variant="default")
            # Running tab content (initially hidden)
            with Vertical(id="running-section"):
                self.running_table = DataTable(id="running-table")
                yield self.running_table
                with Horizontal(id="running-controls"):
                    yield Button("Refresh", id="running-refresh-btn", variant="primary")
                    yield Button("Stop Selected", id="running-stop-btn", variant="warning")
                    yield Button("Stop All", id="running-stop-all-btn", variant="error")
            # Finished tab content (initially visible)
            with Vertical(id="finished-section"):
                self.finished_table = DataTable(id="finished-table")
                yield self.finished_table
                with Horizontal(id="finished-controls"):
                    yield Button("Refresh", id="finished-refresh-btn", variant="primary")
                    yield Button("Clear Selected", id="finished-clear-btn", variant="warning")
                    yield Button("Clear All", id="finished-clear-all-btn", variant="error")
            # Shared textarea for displaying worker logs
            with Vertical(id="logs-section"):
                yield Static("Worker Logs:", id="logs-label")
                self.stdout_display = TextArea(id="stdout-display", read_only=True)
                yield self.stdout_display
            with Horizontal(id="workers-buttons"):
                yield Button("Close", id="close-btn", variant="primary")
    def on_mount(self) -> None:
        """Set up the tables and load worker data."""
        # Set up running workers table
        if self.running_table:
            self.running_table.add_columns(
                "ID",
                "Type",
                "Status",
                "Pipe",
                "Progress",
                "Started",
                "Details"
            )
            self.running_table.zebra_stripes = True
        # Set up finished workers table
        if self.finished_table:
            self.finished_table.add_columns(
                "ID",
                "Type",
                "Result",
                "Pipe",
                "Started",
                "Completed",
                "Duration",
                "Details"
            )
            self.finished_table.zebra_stripes = True
        # Set initial view (show finished by default)
        self._update_view_visibility()
        # Load initial data
        self.refresh_workers()
        # Don't set up periodic refresh - it was causing issues with stdout display
        # Users can click the Refresh button to update manually
    def refresh_workers(self) -> None:
        """Refresh the workers data from app instance."""
        try:
            if not self.app_instance:
                logger.warning("[workers-modal] No app instance provided")
                return
            # Get running workers from app instance
            # This assumes the app has a get_running_workers() method
            if hasattr(self.app_instance, 'get_running_workers'):
                self.running_workers = self.app_instance.get_running_workers()
            else:
                self.running_workers = []
            # Get finished workers from app instance
            if hasattr(self.app_instance, 'get_finished_workers'):
                self.finished_workers = self.app_instance.get_finished_workers()
                if self.finished_workers:
                    logger.info(f"[workers-modal-refresh] Got {len(self.finished_workers)} finished workers from app")
                    # Log the keys in the first worker to verify structure
                    if isinstance(self.finished_workers[0], dict):
                        logger.info(f"[workers-modal-refresh] First worker keys: {list(self.finished_workers[0].keys())}")
                        logger.info(f"[workers-modal-refresh] First worker: {self.finished_workers[0]}")
                    else:
                        logger.warning(f"[workers-modal-refresh] First worker is not a dict: {type(self.finished_workers[0])}")
            else:
                self.finished_workers = []
            # Update tables
            self._update_running_table()
            self._update_finished_table()
            logger.info(f"[workers-modal] Refreshed: {len(self.running_workers)} running, {len(self.finished_workers)} finished")
        except Exception as e:
            logger.error(f"[workers-modal] Error refreshing workers: {e}")
    def _update_view_visibility(self) -> None:
        """Toggle visibility between running and finished views."""
        try:
            running_section = self.query_one("#running-section", Vertical)
            finished_section = self.query_one("#finished-section", Vertical)
            toggle_running_btn = self.query_one("#toggle-running-btn", Button)
            toggle_finished_btn = self.query_one("#toggle-finished-btn", Button)
            if self.show_running:
                running_section.display = True
                finished_section.display = False
                toggle_running_btn.variant = "primary"
                toggle_finished_btn.variant = "default"
                logger.debug("[workers-modal] Switched to Running view")
            else:
                running_section.display = False
                finished_section.display = True
                toggle_running_btn.variant = "default"
                toggle_finished_btn.variant = "primary"
                logger.debug("[workers-modal] Switched to Finished view")
        except Exception as e:
            logger.error(f"[workers-modal] Error updating view visibility: {e}")
    def _update_running_table(self) -> None:
        """Update the running workers table."""
        try:
            if not self.running_table:
                logger.error("[workers-modal] Running table not initialized")
                return
            self.running_table.clear()
            if not self.running_workers:
                self.running_table.add_row("---", "---", "---", "---", "---", "---", "No workers running")
                logger.debug(f"[workers-modal] No running workers to display")
                return
            logger.debug(f"[workers-modal] Updating running table with {len(self.running_workers)} workers")
            for idx, worker_info in enumerate(self.running_workers):
                try:
                    worker_id = worker_info.get('id', 'unknown')
                    worker_type = worker_info.get('type', 'unknown')
                    status = worker_info.get('status', 'running')
                    progress = worker_info.get('progress', '')
                    started = worker_info.get('started', '')
                    details = worker_info.get('details', '')
                    pipe = worker_info.get('pipe', '')
                    # Ensure values are strings
                    worker_id = str(worker_id) if worker_id else 'unknown'
                    worker_type = str(worker_type) if worker_type else 'unknown'
                    status = str(status) if status else 'running'
                    progress = str(progress) if progress else '---'
                    started = str(started) if started else '---'
                    details = str(details) if details else '---'
                    pipe_display = self._summarize_pipe(pipe)
                    # Truncate long strings
                    progress = progress[:20]
                    started = started[:19]
                    details = details[:30]
                    pipe_display = pipe_display[:40]
                    self.running_table.add_row(
                        worker_id[:8],
                        worker_type[:15],
                        status[:10],
                        pipe_display,
                        progress,
                        started,
                        details
                    )
                    if idx == 0:  # Log first entry
                        logger.debug(f"[workers-modal] Added running row {idx}: {worker_id[:8]} {worker_type[:15]} {status}")
                except Exception as row_error:
                    logger.error(f"[workers-modal] Error adding running row {idx}: {row_error}", exc_info=True)
            logger.debug(f"[workers-modal] Updated running table with {len(self.running_workers)} workers")
        except Exception as e:
            logger.error(f"[workers-modal] Error updating running table: {e}", exc_info=True)
    def _update_finished_table(self) -> None:
        """Update the finished workers table."""
        try:
            if not self.finished_table:
                logger.error("[workers-modal] Finished table not initialized")
                return
            self.finished_table.clear()
            if not self.finished_workers:
                self.finished_table.add_row("---", "---", "---", "---", "---", "---", "---", "No finished workers")
                logger.debug(f"[workers-modal] No finished workers to display")
                return
            logger.info(f"[workers-modal-update] STARTING to update finished table with {len(self.finished_workers)} workers")
            added_count = 0
            error_count = 0
            for idx, worker_info in enumerate(self.finished_workers):
                try:
                    worker_id = worker_info.get('id', 'unknown')
                    worker_type = worker_info.get('type', 'unknown')
                    result = worker_info.get('result', 'unknown')
                    completed = worker_info.get('completed', '')
                    duration = worker_info.get('duration', '')
                    details = worker_info.get('details', '')
                    pipe = worker_info.get('pipe', '')
                    started = worker_info.get('started', '')
                    # Ensure values are strings
                    worker_id = str(worker_id) if worker_id else 'unknown'
                    worker_type = str(worker_type) if worker_type else 'unknown'
                    result = str(result) if result else 'unknown'
                    completed = str(completed) if completed else '---'
                    duration = str(duration) if duration else '---'
                    details = str(details) if details else '---'
                    started = str(started) if started else '---'
                    pipe_display = self._summarize_pipe(pipe)
                    # Truncate long strings
                    result = result[:15]
                    completed = completed[:19]
                    started = started[:19]
                    duration = duration[:10]
                    details = details[:30]
                    pipe_display = pipe_display[:40]
                    self.finished_table.add_row(
                        worker_id[:8],
                        worker_type[:15],
                        result,
                        pipe_display,
                        started,
                        completed,
                        duration,
                        details
                    )
                    added_count += 1
                except Exception as row_error:
                    error_count += 1
                    logger.error(f"[workers-modal-update] Error adding finished row {idx}: {row_error}", exc_info=True)
            logger.info(f"[workers-modal-update] COMPLETED: Added {added_count}/{len(self.finished_workers)} finished workers (errors: {error_count})")
            logger.debug(f"[workers-modal-update] Finished table row_count after update: {self.finished_table.row_count}")
        except Exception as e:
            logger.error(f"[workers-modal] Error updating finished table: {e}", exc_info=True)
    def on_data_table_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
        """Handle row highlight in tables - display stdout."""
        try:
            logger.info(f"[workers-modal] Row highlighted, cursor_row: {event.cursor_row}")
            # Get the selected worker from the correct table
            workers_list = None
            if event.control == self.running_table:
                workers_list = self.running_workers
                logger.debug(f"[workers-modal] Highlighted in running table")
            elif event.control == self.finished_table:
                workers_list = self.finished_workers
                logger.debug(f"[workers-modal] Highlighted in finished table, list size: {len(workers_list)}")
            else:
                logger.warning(f"[workers-modal] Unknown table: {event.control}")
                return
            # Get the worker at this row
            if workers_list and 0 <= event.cursor_row < len(workers_list):
                worker = workers_list[event.cursor_row]
                worker_id = worker.get('id', '')
                logger.info(f"[workers-modal] Highlighted worker: {worker_id}")
                if worker_id:
                    self.selected_worker_id = worker_id
                    # Display the stdout
                    self._update_stdout_display(worker_id, worker)
            else:
                logger.warning(f"[workers-modal] Row {event.cursor_row} out of bounds for list of size {len(workers_list) if workers_list else 0}")
        except Exception as e:
            logger.error(f"[workers-modal] Error handling row highlight: {e}", exc_info=True)
    def on_data_table_cell_highlighted(self, event: DataTable.CellHighlighted) -> None:
        """Handle cell highlight in tables - display stdout (backup for row selection)."""
        try:
            # CellHighlighted has coordinate (row, column) not cursor_row
            cursor_row = event.coordinate.row
            logger.debug(f"[workers-modal] Cell highlighted, row: {cursor_row}, column: {event.coordinate.column}")
            # Get the selected worker from the correct table
            workers_list = None
            if event.data_table == self.running_table:
                workers_list = self.running_workers
                logger.debug(f"[workers-modal] Cell highlighted in running table")
            elif event.data_table == self.finished_table:
                workers_list = self.finished_workers
                logger.debug(f"[workers-modal] Cell highlighted in finished table, list size: {len(workers_list)}")
            else:
                return
            # Get the worker at this row
            if workers_list and 0 <= cursor_row < len(workers_list):
                worker = workers_list[cursor_row]
                worker_id = worker.get('id', '')
                if worker_id and worker_id != self.selected_worker_id:
                    logger.info(f"[workers-modal] Cell-highlighted worker: {worker_id}")
                    self.selected_worker_id = worker_id
                    # Display the stdout
                    self._update_stdout_display(worker_id, worker)
        except Exception as e:
            logger.debug(f"[workers-modal] Error handling cell highlight: {e}")
    def _update_stdout_display(self, worker_id: str, worker: Optional[Dict[str, Any]] = None) -> None:
        """Update the stdout textarea with logs from the selected worker."""
        try:
            if not self.stdout_display:
                logger.error("[workers-modal] stdout_display not initialized")
                return
            logger.debug(f"[workers-modal] Updating stdout display for worker: {worker_id}")
            worker_data = worker or self._locate_worker(worker_id)
            stdout_text = self._resolve_worker_stdout(worker_id, worker_data)
            pipe_text = self._resolve_worker_pipe(worker_id, worker_data)
            events = self._get_worker_events(worker_id)
            timeline_text = self._format_worker_timeline(events)
            sections = []
            if pipe_text:
                sections.append(f"Pipe:\n{pipe_text}")
            if timeline_text:
                sections.append("Timeline:\n" + timeline_text)
            logs_body = (stdout_text or "").strip()
            sections.append("Logs:\n" + (logs_body if logs_body else "(no logs recorded)"))
            combined_text = "\n\n".join(sections)
            logger.debug(f"[workers-modal] Setting textarea to {len(combined_text)} chars (stdout_len={len(stdout_text or '')})")
            self.stdout_display.text = combined_text
            if len(combined_text) > 10:
                try:
                    self.stdout_display.cursor_location = (len(combined_text) - 1, 0)
                except Exception:
                    pass
            logger.info(f"[workers-modal] Updated stdout display successfully")
        except Exception as e:
            logger.error(f"[workers-modal] Error updating stdout display: {e}", exc_info=True)
    def _locate_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
        for worker in self.running_workers or []:
            if isinstance(worker, dict) and worker.get('id') == worker_id:
                return worker
        for worker in self.finished_workers or []:
            if isinstance(worker, dict) and worker.get('id') == worker_id:
                return worker
        return None
    def _resolve_worker_stdout(self, worker_id: str, worker: Optional[Dict[str, Any]]) -> str:
        if worker and worker.get('stdout'):
            return worker.get('stdout', '') or ''
        manager = getattr(self.app_instance, 'worker_manager', None)
        if manager:
            try:
                return manager.get_stdout(worker_id) or ''
            except Exception as exc:
                logger.debug(f"[workers-modal] Could not fetch stdout for {worker_id}: {exc}")
        return ''
    def _resolve_worker_pipe(self, worker_id: str, worker: Optional[Dict[str, Any]]) -> str:
        if worker and worker.get('pipe'):
            return str(worker.get('pipe'))
        record = self._fetch_worker_record(worker_id)
        if record and record.get('pipe'):
            return str(record.get('pipe'))
        return ''
    def _fetch_worker_record(self, worker_id: str) -> Optional[Dict[str, Any]]:
        manager = getattr(self.app_instance, 'worker_manager', None)
        if not manager:
            return None
        try:
            return manager.get_worker(worker_id)
        except Exception as exc:
            logger.debug(f"[workers-modal] Could not fetch worker record {worker_id}: {exc}")
            return None
    def _get_worker_events(self, worker_id: str, limit: int = 250) -> List[Dict[str, Any]]:
        manager = getattr(self.app_instance, 'worker_manager', None)
        if not manager:
            return []
        try:
            return manager.get_worker_events(worker_id, limit=limit)
        except Exception as exc:
            logger.debug(f"[workers-modal] Could not fetch worker events {worker_id}: {exc}")
            return []
    def _format_worker_timeline(self, events: List[Dict[str, Any]]) -> str:
        if not events:
            return ""
        lines: List[str] = []
        for event in events:
            timestamp = self._format_event_timestamp(event.get('created_at'))
            label = (event.get('event_type') or '').upper() or 'EVENT'
            channel = (event.get('channel') or '').upper()
            if channel and channel not in label:
                label = f"{label}/{channel}"
            step = event.get('step') or ''
            message = event.get('message') or ''
            prefix = ''
            if event.get('event_type') == 'step' and step:
                prefix = f"{step} :: "
            elif step and step not in message:
                prefix = f"{step} :: "
            formatted_message = self._format_message_block(message)
            lines.append(f"[{timestamp}] {label}: {prefix}{formatted_message}")
        return "\n".join(lines)
    def _format_event_timestamp(self, raw_timestamp: Any) -> str:
        if not raw_timestamp:
            return "--:--:--"
        text = str(raw_timestamp)
        if "T" in text:
            time_part = text.split("T", 1)[1]
        elif " " in text:
            time_part = text.split(" ", 1)[1]
        else:
            time_part = text
        return time_part[:8] if len(time_part) >= 8 else time_part
    def _format_message_block(self, message: str) -> str:
        clean = (message or '').strip()
        if not clean:
            return "(empty)"
        lines = clean.splitlines()
        if len(lines) == 1:
            return lines[0]
        head, *rest = lines
        indented = "\n".join(f"    {line}" for line in rest)
        return f"{head}\n{indented}"
    def _summarize_pipe(self, pipe_value: Any, limit: int = 40) -> str:
        text = str(pipe_value or '').strip()
        if not text:
            return "(none)"
        return text if len(text) <= limit else text[: limit - 3] + '...'
    def on_button_pressed(self, event: Button.Pressed) -> None:
        """Handle button presses."""
        button_id = event.button.id
        try:
            if button_id == "toggle-running-btn":
                self.show_running = True
                self._update_view_visibility()
                return
            elif button_id == "toggle-finished-btn":
                self.show_running = False
                self._update_view_visibility()
                return
            if button_id == "running-refresh-btn":
                self.refresh_workers()
            elif button_id == "running-stop-btn":
                # Stop selected running worker
                if self.running_table and self.running_table.row_count > 0:
                    try:
                        selected_row = self.running_table.cursor_row
                        if 0 <= selected_row < len(self.running_workers):
                            worker = self.running_workers[selected_row]
                            worker_id = worker.get('id')
                            if self.app_instance and hasattr(self.app_instance, 'stop_worker'):
                                self.app_instance.stop_worker(worker_id)
                                logger.info(f"[workers-modal] Stopped worker: {worker_id}")
                                self.refresh_workers()
                    except Exception as e:
                        logger.error(f"[workers-modal] Error stopping worker: {e}")
            elif button_id == "running-stop-all-btn":
                # Stop all running workers
                if self.app_instance and hasattr(self.app_instance, 'stop_all_workers'):
                    self.app_instance.stop_all_workers()
                    logger.info("[workers-modal] Stopped all workers")
                    self.refresh_workers()
            elif button_id == "finished-refresh-btn":
                self.refresh_workers()
            elif button_id == "finished-clear-btn":
                # Clear selected finished worker
                if self.finished_table and self.finished_table.row_count > 0:
                    try:
                        selected_row = self.finished_table.cursor_row
                        if 0 <= selected_row < len(self.finished_workers):
                            worker = self.finished_workers[selected_row]
                            worker_id = worker.get('id')
                            if self.app_instance and hasattr(self.app_instance, 'clear_finished_worker'):
                                self.app_instance.clear_finished_worker(worker_id)
                                logger.info(f"[workers-modal] Cleared worker: {worker_id}")
                                self.refresh_workers()
                    except Exception as e:
                        logger.error(f"[workers-modal] Error clearing worker: {e}")
            elif button_id == "finished-clear-all-btn":
                # Clear all finished workers
                if self.app_instance and hasattr(self.app_instance, 'clear_all_finished_workers'):
                    self.app_instance.clear_all_finished_workers()
                    logger.info("[workers-modal] Cleared all finished workers")
                    self.refresh_workers()
            elif button_id == "close-btn":
                self.dismiss(None)
        except Exception as e:
            logger.error(f"[workers-modal] Error in on_button_pressed: {e}")
    def action_cancel(self) -> None:
        """Action for Escape key - close modal."""
        self.dismiss(None)
--- a/TUI/modalscreen/workers.tcss
+++ b/TUI/modalscreen/workers.tcss
@@ -0,0 +1,119 @@
 /* Workers Modal Stylesheet */
 Screen {
    background: $surface;
    color: $text;
 }
 #workers-container {
    width: 100%;
    height: 100%;
    layout: vertical;
    background: $panel;
 }
 #workers-title-bar {
    dock: top;
    height: 3;
    layout: horizontal;
    background: $boost;
    border: solid $accent;
    padding: 0 1;
 }
 #workers-title {
    width: 1fr;
    height: 100%;
    content-align-vertical: middle;
    color: $text;
    text-style: bold;
 }
 #toggle-running-btn,
 #toggle-finished-btn {
    width: auto;
    height: 100%;
    margin: 0;
 }
 #running-section,
 #finished-section {
    width: 100%;
    height: 40%;
    layout: vertical;
    border: solid $accent;
 }
 #running-table,
 #finished-table {
    width: 100%;
    height: 1fr;
    border: solid $accent;
 }
 #running-controls,
 #finished-controls {
    width: 100%;
    height: auto;
    min-height: 3;
    layout: horizontal;
    background: $boost;
    padding: 1;
    border-top: solid $accent;
 }
 #running-controls Button,
 #finished-controls Button {
    margin-right: 1;
    min-width: 15;
 }
 #logs-label {
    height: 1;
    margin: 0 1;
    text-style: bold;
 }
 #logs-section {
    width: 100%;
    height: 1fr;
    layout: vertical;
    border: solid $accent;
    background: $panel;
 }
 #stdout-display {
    width: 100%;
    height: 1fr;
    border: solid $accent;
    margin: 1;
 }
 #workers-buttons {
    dock: bottom;
    height: auto;
    min-height: 3;
    layout: horizontal;
    border: solid $accent;
    padding: 1;
 }
 #workers-buttons Button {
    margin-right: 1;
    min-width: 15;
 }
 DataTable {
    border: solid $accent;
 }
 DataTable > .datatable--header {
    background: $boost;
    color: $text;
    text-style: bold;
 }
 DataTable > .datatable--cursor {
    background: $accent;
    color: $panel;
 }
--- a/TUI/pipeline_runner.py
+++ b/TUI/pipeline_runner.py
@@ -0,0 +1,356 @@
 """Pipeline execution utilities for the Textual UI.
 This module mirrors the CLI pipeline behaviour while exposing a class-based
 interface that the TUI can call. It keeps all pipeline/cmdlet integration in
 one place so the interface layer stays focused on presentation.
 """
 from __future__ import annotations
 import contextlib
 import io
 import shlex
 import uuid
 from dataclasses import dataclass, field
 import sys
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Sequence
 BASE_DIR = Path(__file__).resolve().parent
 ROOT_DIR = BASE_DIR.parent
 for path in (ROOT_DIR, BASE_DIR):
    str_path = str(path)
    if str_path not in sys.path:
        sys.path.insert(0, str_path)
 import pipeline as ctx
 from cmdlets import REGISTRY
 from config import get_local_storage_path, load_config
 from helper.worker_manager import WorkerManager
 try:  # Reuse the CLI selection parser instead of reimplementing it.
    from CLI import _parse_selection_syntax
 except ImportError:  # pragma: no cover - fallback for atypical environments
    _parse_selection_syntax = None  # type: ignore
@dataclass(slots=True)
 class PipelineStageResult:
    """Summary for a single pipeline stage."""
    name: str
    args: Sequence[str]
    emitted: List[Any] = field(default_factory=list)
    status: str = "pending"
    error: Optional[str] = None
@dataclass(slots=True)
 class PipelineRunResult:
    """Aggregate result for a pipeline run."""
    pipeline: str
    success: bool
    stages: List[PipelineStageResult] = field(default_factory=list)
    emitted: List[Any] = field(default_factory=list)
    stdout: str = ""
    stderr: str = ""
    error: Optional[str] = None
    def to_summary(self) -> Dict[str, Any]:
        """Provide a JSON-friendly representation for logging or UI."""
        return {
            "pipeline": self.pipeline,
            "success": self.success,
            "error": self.error,
            "stages": [
                {
                    "name": stage.name,
                    "status": stage.status,
                    "error": stage.error,
                    "emitted": len(stage.emitted),
                }
                for stage in self.stages
            ],
        }
 class PipelineExecutor:
    """Thin wrapper over the cmdlet registry + pipeline context."""
    def __init__(
        self,
        *,
        config: Optional[Dict[str, Any]] = None,
        worker_manager: Optional[WorkerManager] = None,
    ) -> None:
        self._config = config or load_config()
        self._worker_manager = worker_manager
        if self._worker_manager is None:
            self._worker_manager = self._ensure_worker_manager()
        if self._worker_manager:
            self._config["_worker_manager"] = self._worker_manager
    @property
    def worker_manager(self) -> Optional[WorkerManager]:
        return self._worker_manager
    def run_pipeline(
        self,
        pipeline_text: str,
        *,
        on_log: Optional[Callable[[str], None]] = None,
    ) -> PipelineRunResult:
        """Execute a pipeline string and return structured results.
        Args:
            pipeline_text: Raw pipeline text entered by the user.
            on_log: Optional callback that receives human-readable log lines.
        """
        normalized = pipeline_text.strip()
        result = PipelineRunResult(pipeline=normalized, success=False)
        if not normalized:
            result.error = "Pipeline is empty"
            return result
        tokens = self._tokenize(normalized)
        stages = self._split_stages(tokens)
        if not stages:
            result.error = "Pipeline contains no stages"
            return result
        ctx.reset()
        ctx.set_current_command_text(normalized)
        stdout_buffer = io.StringIO()
        stderr_buffer = io.StringIO()
        piped_result: Any = None
        worker_session = self._start_worker_session(normalized)
        try:
            with contextlib.redirect_stdout(stdout_buffer), contextlib.redirect_stderr(
                stderr_buffer
            ):
                for index, stage_tokens in enumerate(stages):
                    stage = self._execute_stage(
                        index=index,
                        total=len(stages),
                        stage_tokens=stage_tokens,
                        piped_input=piped_result,
                        on_log=on_log,
                    )
                    result.stages.append(stage)
                    if stage.status != "completed":
                        result.error = stage.error or f"Stage {stage.name} failed"
                        return result
                    if index == len(stages) - 1:
                        result.emitted = stage.emitted
                    else:
                        piped_result = stage.emitted
            result.success = True
            return result
        finally:
            result.stdout = stdout_buffer.getvalue()
            result.stderr = stderr_buffer.getvalue()
            ctx.clear_current_command_text()
            if worker_session is not None:
                status = "completed" if result.success else "error"
                worker_session.finish(status=status, message=result.error or "")
    # ------------------------------------------------------------------
    # Stage execution helpers
    # ------------------------------------------------------------------
    def _execute_stage(
        self,
        *,
        index: int,
        total: int,
        stage_tokens: Sequence[str],
        piped_input: Any,
        on_log: Optional[Callable[[str], None]],
    ) -> PipelineStageResult:
        if not stage_tokens:
            return PipelineStageResult(name="(empty)", args=[], status="skipped")
        cmd_name = stage_tokens[0].replace("_", "-").lower()
        stage_args = stage_tokens[1:]
        stage = PipelineStageResult(name=cmd_name, args=stage_args)
        if cmd_name.startswith("@"):
            return self._apply_selection_stage(
                token=cmd_name,
                stage=stage,
                piped_input=piped_input,
                on_log=on_log,
            )
        cmd_fn = REGISTRY.get(cmd_name)
        if not cmd_fn:
            stage.status = "failed"
            stage.error = f"Unknown command: {cmd_name}"
            return stage
        pipeline_ctx = ctx.PipelineStageContext(stage_index=index, total_stages=total)
        ctx.set_stage_context(pipeline_ctx)
        ctx.set_active(True)
        ctx.set_last_stage(index == total - 1)
        try:
            return_code = cmd_fn(piped_input, list(stage_args), self._config)
        except Exception as exc:  # pragma: no cover - surfaced in UI
            stage.status = "failed"
            stage.error = f"{type(exc).__name__}: {exc}"
            if on_log:
                on_log(stage.error)
            return stage
        finally:
            ctx.set_stage_context(None)
            ctx.set_active(False)
        emitted = list(getattr(pipeline_ctx, "emits", []) or [])
        stage.emitted = emitted
        if return_code != 0:
            stage.status = "failed"
            stage.error = f"Exit code {return_code}"
        else:
            stage.status = "completed"
            stage.error = None
        worker_id = self._current_worker_id()
        if self._worker_manager and worker_id:
            label = f"[Stage {index + 1}/{total}] {cmd_name} {stage.status}"
            self._worker_manager.log_step(worker_id, label)
        ctx.set_last_result_table(None, emitted)
        ctx.set_last_items(emitted)
        return stage
    def _apply_selection_stage(
        self,
        *,
        token: str,
        stage: PipelineStageResult,
        piped_input: Any,
        on_log: Optional[Callable[[str], None]],
    ) -> PipelineStageResult:
        selection = self._parse_selection(token)
        items = piped_input or []
        if not isinstance(items, list):
            items = list(items if isinstance(items, Sequence) else [items])
        if not items:
            stage.status = "failed"
            stage.error = "Selection requested but there is no upstream data"
            return stage
        if selection is None:
            stage.emitted = list(items)
        else:
            zero_based = sorted(i - 1 for i in selection if i > 0)
            stage.emitted = [items[i] for i in zero_based if 0 <= i < len(items)]
        if not stage.emitted:
            stage.status = "failed"
            stage.error = "Selection matched no rows"
            return stage
        ctx.set_last_items(stage.emitted)
        ctx.set_last_result_table(None, stage.emitted)
        stage.status = "completed"
        if on_log:
            on_log(f"Selected {len(stage.emitted)} item(s) via {token}")
        return stage
    # ------------------------------------------------------------------
    # Worker/session helpers
    # ------------------------------------------------------------------
    def _start_worker_session(self, pipeline_text: str) -> Optional[_WorkerSession]:
        manager = self._ensure_worker_manager()
        if manager is None:
            return None
        worker_id = f"tui_pipeline_{uuid.uuid4().hex[:8]}"
        tracked = manager.track_worker(
            worker_id,
            worker_type="pipeline",
            title="Pipeline run",
            description=pipeline_text,
            pipe=pipeline_text,
        )
        if not tracked:
            return None
        manager.log_step(worker_id, "Pipeline started")
        self._config["_current_worker_id"] = worker_id
        return _WorkerSession(manager=manager, worker_id=worker_id, config=self._config)
    def _ensure_worker_manager(self) -> Optional[WorkerManager]:
        if self._worker_manager:
            return self._worker_manager
        library_root = get_local_storage_path(self._config)
        if not library_root:
            return None
        try:
            self._worker_manager = WorkerManager(Path(library_root), auto_refresh_interval=0)
            self._config["_worker_manager"] = self._worker_manager
        except Exception:
            self._worker_manager = None
        return self._worker_manager
    def _current_worker_id(self) -> Optional[str]:
        worker_id = self._config.get("_current_worker_id")
        return str(worker_id) if worker_id else None
    # ------------------------------------------------------------------
    # Parsing helpers
    # ------------------------------------------------------------------
    @staticmethod
    def _tokenize(pipeline_text: str) -> List[str]:
        try:
            return shlex.split(pipeline_text)
        except ValueError:
            return pipeline_text.split()
    @staticmethod
    def _split_stages(tokens: Sequence[str]) -> List[List[str]]:
        stages: List[List[str]] = []
        current: List[str] = []
        for token in tokens:
            if token == "|":
                if current:
                    stages.append(current)
                    current = []
            else:
                current.append(token)
        if current:
            stages.append(current)
        return stages
    @staticmethod
    def _parse_selection(token: str) -> Optional[Sequence[int]]:
        if _parse_selection_syntax:
              parsed = _parse_selection_syntax(token)
              if parsed:
                 return sorted(parsed)
        return None
 class _WorkerSession:
    """Minimal worker session wrapper for the TUI executor."""
    def __init__(self, *, manager: WorkerManager, worker_id: str, config: Optional[Dict[str, Any]] = None) -> None:
        self._manager = manager
        self.worker_id = worker_id
        self._config = config
    def finish(self, *, status: str, message: str) -> None:
        try:
            self._manager.finish_worker(self.worker_id, result=status, error_msg=message)
            self._manager.log_step(self.worker_id, f"Pipeline {status}")
        except Exception:
            pass
        if self._config and self._config.get("_current_worker_id") == self.worker_id:
            self._config.pop("_current_worker_id", None)
--- a/TUI/tui.py
+++ b/TUI/tui.py
@@ -0,0 +1,332 @@
 """Modern Textual UI for driving Medeia-Macina pipelines."""
 from __future__ import annotations
 import sys
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Sequence
 from textual import work
 from textual.app import App, ComposeResult
 from textual.binding import Binding
 from textual.containers import Container, Horizontal, Vertical, VerticalScroll
 from textual.widgets import (
    Button,
    DataTable,
    Footer,
    Header,
    Input,
    ListItem,
    ListView,
    Static,
    TextArea,
    Tree,
 )
 BASE_DIR = Path(__file__).resolve().parent
 ROOT_DIR = BASE_DIR.parent
 for path in (BASE_DIR, ROOT_DIR):
    str_path = str(path)
    if str_path not in sys.path:
        sys.path.insert(0, str_path)
 from menu_actions import (  # type: ignore  # noqa: E402
    PIPELINE_PRESETS,
    PipelinePreset,
    build_metadata_snapshot,
    summarize_result,
 )
 from pipeline_runner import PipelineExecutor, PipelineRunResult  # type: ignore  # noqa: E402
 class PresetListItem(ListItem):
    """List entry that stores its pipeline preset."""
    def __init__(self, preset: PipelinePreset) -> None:
        super().__init__(
            Static(
                f"[b]{preset.label}[/b]\n[pale_green4]{preset.description}[/pale_green4]",
                classes="preset-entry",
            )
        )
        self.preset = preset
 class PipelineHubApp(App):
    """Textual front-end that executes cmdlet pipelines inline."""
    CSS_PATH = "tui.tcss"
    BINDINGS = [
        Binding("ctrl+enter", "run_pipeline", "Run Pipeline"),
        Binding("f5", "refresh_workers", "Refresh Workers"),
        Binding("ctrl+l", "focus_command", "Focus Input", show=False),
    ]
    def __init__(self) -> None:
        super().__init__()
        self.executor = PipelineExecutor()
        self.result_items: List[Any] = []
        self.log_lines: List[str] = []
        self.command_input: Optional[Input] = None
        self.log_output: Optional[TextArea] = None
        self.results_table: Optional[DataTable] = None
        self.metadata_tree: Optional[Tree] = None
        self.worker_table: Optional[DataTable] = None
        self.preset_list: Optional[ListView] = None
        self.status_panel: Optional[Static] = None
        self._pipeline_running = False
    # ------------------------------------------------------------------
    # Layout
    # ------------------------------------------------------------------
    def compose(self) -> ComposeResult:  # noqa: D401 - Textual compose hook
        yield Header(show_clock=True)
        with Container(id="app-shell"):
            with Horizontal(id="command-row"):
                self.command_input = Input(
                    placeholder='download-data "<url>" | merge-file | add-tag | add-file -storage local',
                    id="pipeline-input",
                )
                yield self.command_input
                yield Button("Run", id="run-button", variant="primary")
                self.status_panel = Static("Idle", id="status-panel")
                yield self.status_panel
            with Horizontal(id="content-row"):
                with VerticalScroll(id="left-pane"):
                    yield Static("Pipeline Presets", classes="section-title")
                    self.preset_list = ListView(
                        *(PresetListItem(preset) for preset in PIPELINE_PRESETS),
                        id="preset-list",
                    )
                    yield self.preset_list
                    yield Static("Logs", classes="section-title")
                    self.log_output = TextArea(id="log-output", read_only=True)
                    yield self.log_output
                    yield Static("Workers", classes="section-title")
                    self.worker_table = DataTable(id="workers-table")
                    yield self.worker_table
                with Vertical(id="right-pane"):
                    yield Static("Results", classes="section-title")
                    self.results_table = DataTable(id="results-table")
                    yield self.results_table
                    yield Static("Metadata", classes="section-title")
                    self.metadata_tree = Tree("Run a pipeline", id="metadata-tree")
                    yield self.metadata_tree
        yield Footer()
    def on_mount(self) -> None:
        if self.results_table:
            self.results_table.add_columns("Row", "Title", "Source", "File")
        if self.worker_table:
            self.worker_table.add_columns("ID", "Type", "Status", "Details")
        if self.executor.worker_manager:
            self.set_interval(2.0, self.refresh_workers)
        self.refresh_workers()
        if self.command_input:
            self.command_input.focus()
    # ------------------------------------------------------------------
    # Actions
    # ------------------------------------------------------------------
    def action_focus_command(self) -> None:
        if self.command_input:
            self.command_input.focus()
    def action_run_pipeline(self) -> None:
        if self._pipeline_running:
            self.notify("Pipeline already running", severity="warning", timeout=3)
            return
        if not self.command_input:
            return
        pipeline_text = self.command_input.value.strip()
        if not pipeline_text:
            self.notify("Enter a pipeline to run", severity="warning", timeout=3)
            return
        self._pipeline_running = True
        self._set_status("Running…", level="info")
        self._clear_log()
        self._append_log_line(f"$ {pipeline_text}")
        self._clear_results()
        self._run_pipeline_background(pipeline_text)
    def action_refresh_workers(self) -> None:
        self.refresh_workers()
    # ------------------------------------------------------------------
    # Event handlers
    # ------------------------------------------------------------------
    def on_button_pressed(self, event: Button.Pressed) -> None:
        if event.button.id == "run-button":
            self.action_run_pipeline()
    def on_input_submitted(self, event: Input.Submitted) -> None:
        if event.input.id == "pipeline-input":
            self.action_run_pipeline()
    def on_list_view_selected(self, event: ListView.Selected) -> None:
        if isinstance(event.item, PresetListItem) and self.command_input:
            self.command_input.value = event.item.preset.pipeline
            self.notify(f"Loaded preset: {event.item.preset.label}", timeout=2)
            event.stop()
    def on_data_table_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
        if not self.results_table or event.control is not self.results_table:
            return
        index = event.cursor_row
        if 0 <= index < len(self.result_items):
            self._display_metadata(self.result_items[index])
    # ------------------------------------------------------------------
    # Pipeline execution helpers
    # ------------------------------------------------------------------
    @work(exclusive=True, thread=True)
    def _run_pipeline_background(self, pipeline_text: str) -> None:
        run_result = self.executor.run_pipeline(pipeline_text, on_log=self._log_from_worker)
        self.call_from_thread(self._on_pipeline_finished, run_result)
    def _on_pipeline_finished(self, run_result: PipelineRunResult) -> None:
        self._pipeline_running = False
        status_level = "success" if run_result.success else "error"
        status_text = "Completed" if run_result.success else "Failed"
        self._set_status(status_text, level=status_level)
        if not run_result.success:
            self.notify(run_result.error or "Pipeline failed", severity="error", timeout=6)
        else:
            self.notify("Pipeline completed", timeout=3)
        if run_result.stdout.strip():
            self._append_log_line("stdout:")
            self._append_block(run_result.stdout)
        if run_result.stderr.strip():
            self._append_log_line("stderr:")
            self._append_block(run_result.stderr)
        for stage in run_result.stages:
            summary = f"[{stage.status}] {stage.name} -> {len(stage.emitted)} item(s)"
            if stage.error:
                summary += f" ({stage.error})"
            self._append_log_line(summary)
        emitted = run_result.emitted
        if isinstance(emitted, list):
            self.result_items = emitted
        elif emitted:
            self.result_items = [emitted]
        else:
            self.result_items = []
        self._populate_results_table()
        self.refresh_workers()
    def _log_from_worker(self, message: str) -> None:
        self.call_from_thread(self._append_log_line, message)
    # ------------------------------------------------------------------
    # UI helpers
    # ------------------------------------------------------------------
    def _populate_results_table(self) -> None:
        if not self.results_table:
            return
        self.results_table.clear()
        if not self.result_items:
            self.results_table.add_row("—", "No results", "", "")
            return
        for idx, item in enumerate(self.result_items, start=1):
            if isinstance(item, dict):
                title = summarize_result(item)
                source = item.get("source") or item.get("cmdlet_name") or item.get("cmdlet") or "—"
                file_path = item.get("file_path") or item.get("path") or "—"
            else:
                title = str(item)
                source = "—"
                file_path = "—"
            self.results_table.add_row(str(idx), title, source, file_path, key=str(idx - 1))
    def _display_metadata(self, item: Any) -> None:
        if not self.metadata_tree:
            return
        root = self.metadata_tree.root
        root.label = "Metadata"
        root.remove_children()
        payload: Dict[str, Any]
        if isinstance(item, dict):
            file_path = item.get("file_path") or item.get("path")
            if file_path:
                payload = build_metadata_snapshot(Path(file_path))
            else:
                payload = item
        else:
            payload = {"value": str(item)}
        self._populate_tree_node(root, payload)
        root.expand_all()
    def _populate_tree_node(self, node, data: Any) -> None:
        if isinstance(data, dict):
            for key, value in data.items():
                child = node.add(f"[b]{key}[/b]")
                self._populate_tree_node(child, value)
        elif isinstance(data, Sequence) and not isinstance(data, (str, bytes)):
            for idx, value in enumerate(data):
                child = node.add(f"[{idx}]")
                self._populate_tree_node(child, value)
        else:
            node.add(str(data))
    def _clear_log(self) -> None:
        self.log_lines = []
        if self.log_output:
            self.log_output.value = ""
    def _append_log_line(self, line: str) -> None:
        self.log_lines.append(line)
        if len(self.log_lines) > 500:
            self.log_lines = self.log_lines[-500:]
        if self.log_output:
            self.log_output.value = "\n".join(self.log_lines)
    def _append_block(self, text: str) -> None:
        for line in text.strip().splitlines():
            self._append_log_line(f"  {line}")
    def _clear_results(self) -> None:
        self.result_items = []
        if self.results_table:
            self.results_table.clear()
        if self.metadata_tree:
            self.metadata_tree.root.label = "Awaiting results"
            self.metadata_tree.root.remove_children()
    def _set_status(self, message: str, *, level: str = "info") -> None:
        if not self.status_panel:
            return
        for css in ("status-info", "status-success", "status-error"):
            self.status_panel.remove_class(css)
        css_class = f"status-{level if level in {'success', 'error'} else 'info'}"
        self.status_panel.add_class(css_class)
        self.status_panel.update(message)
    def refresh_workers(self) -> None:
        if not self.worker_table:
            return
        manager = self.executor.worker_manager
        self.worker_table.clear()
        if manager is None:
            self.worker_table.add_row("—", "—", "—", "Worker manager unavailable")
            return
        workers = manager.get_active_workers()
        if not workers:
            self.worker_table.add_row("—", "—", "—", "No active workers")
            return
        for worker in workers:
            worker_id = str(worker.get("worker_id") or worker.get("id") or "?")[:8]
            worker_type = str(worker.get("worker_type") or worker.get("type") or "?")
            status = str(worker.get("status") or worker.get("result") or "running")
            details = worker.get("current_step") or worker.get("description") or worker.get("pipe") or ""
            self.worker_table.add_row(worker_id, worker_type, status, str(details)[:80])
 if __name__ == "__main__":
    PipelineHubApp().run()
--- a/TUI/tui.tcss
+++ b/TUI/tui.tcss
@@ -0,0 +1,100 @@
 #app-shell {
    width: 100%;
    height: 100%;
    padding: 1 2;
    background: $surface;
    layout: vertical;
 }
 #command-row {
    width: 100%;
    height: auto;
    background: $boost;
    padding: 1;
    border: round $primary;
 }
 #pipeline-input {
    width: 1fr;
    min-height: 3;
    padding: 0 1;
    margin-right: 1;
    background: $surface;
    color: $text;
    border: round $primary;
 }
 #pipeline-input:focus {
    border: double $primary;
    background: $surface;
 }
 #status-panel {
    min-width: 20;
    text-style: bold;
    content-align: center middle;
    padding: 0 1;
    border: solid $panel-darken-1;
 }
 #content-row {
    width: 100%;
    height: 1fr;
 }
 #left-pane,
 #right-pane {
    width: 1fr;
    height: 100%;
    padding: 1;
    background: $panel;
    border: round $panel-darken-2;
 }
 #left-pane {
    max-width: 48;
 }
 .section-title {
    text-style: bold;
    color: $text-muted;
    margin-top: 1;
 }
 .preset-entry {
    padding: 1;
    border: tall $panel-darken-1;
    margin-bottom: 1;
 }
 #log-output {
    height: 16;
 }
 #workers-table {
    height: auto;
 }
 #results-table {
    height: 1fr;
 }
 #metadata-tree {
    height: 1fr;
    border: round $panel-darken-1;
 }
 .status-info {
    background: $boost;
    color: $text;
 }
 .status-success {
    background: $success 20%;
    color: $success;
 }
 .status-error {
    background: $error 20%;
    color: $error;
 }
--- a/cmdlets/init.py
+++ b/cmdlets/init.py
@@ -0,0 +1,139 @@
 from __future__ import annotations
 from typing import Any, Callable, Dict, Iterable, Sequence
 from importlib import import_module as _import_module
 # A cmdlet is a callable taking (result, args, config) -> int
 Cmdlet = Callable[[Any, Sequence[str], Dict[str, Any]], int]
 # Registry of command-name -> cmdlet function
 REGISTRY: Dict[str, Cmdlet] = {}
 def register(names: Iterable[str]):
    """Decorator to register a function under one or more command names.
    Usage:
        @register(["add-tag", "add-tags"])
        def _run(result, args, config) -> int: ...
    """
    def _wrap(fn: Cmdlet) -> Cmdlet:
        for name in names:
            REGISTRY[name.replace('_', '-').lower()] = fn
        return fn
    return _wrap
 class AutoRegister:
    """Decorator that automatically registers a cmdlet function using CMDLET.aliases.
    Usage:
        CMDLET = Cmdlet(
            name="delete-file",
            aliases=["del", "del-file"],
            ...
        )
        @AutoRegister(CMDLET)
        def _run(result, args, config) -> int:
            ...
    Registers the cmdlet under:
    - Its main name from CMDLET.name
    - All aliases from CMDLET.aliases
    This allows the help display to show: "cmd: delete-file | alias: del, del-file"
    """
    def __init__(self, cmdlet):
        self.cmdlet = cmdlet
    def __call__(self, fn: Cmdlet) -> Cmdlet:
        """Register fn for the main name and all aliases in cmdlet."""
        normalized_name = None
        # Register for main name first
        if hasattr(self.cmdlet, 'name') and self.cmdlet.name:
            normalized_name = self.cmdlet.name.replace('_', '-').lower()
            REGISTRY[normalized_name] = fn
        # Register for all aliases
        if hasattr(self.cmdlet, 'aliases') and self.cmdlet.aliases:
            for alias in self.cmdlet.aliases:
                normalized_alias = alias.replace('_', '-').lower()
                # Always register (aliases are separate from main name)
                REGISTRY[normalized_alias] = fn
        return fn
 def get(cmd_name: str) -> Cmdlet | None:
    return REGISTRY.get(cmd_name.replace('_', '-').lower())
 def format_cmd_help(cmdlet) -> str:
    """Format a cmdlet for help display showing cmd:name and aliases.
    Example output: "delete-file | aliases: del, del-file"
    """
    if not hasattr(cmdlet, 'name'):
        return str(cmdlet)
    cmd_str = f"cmd: {cmdlet.name}"
    if hasattr(cmdlet, 'aliases') and cmdlet.aliases:
        aliases_str = ", ".join(cmdlet.aliases)
        cmd_str += f" | aliases: {aliases_str}"
    return cmd_str
 # Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py)
 import os
 cmdlet_dir = os.path.dirname(__file__)
 for filename in os.listdir(cmdlet_dir):
    if (
        filename.endswith(".py")
        and not filename.startswith("_")
        and filename != "__init__.py"
    ):
        mod_name = filename[:-3]
        try:
            module = _import_module(f".{mod_name}", __name__)
            # Auto-register based on CMDLET object with exec function
            # This allows cmdlets to be fully self-contained in the CMDLET object
            if hasattr(module, 'CMDLET'):
                cmdlet_obj = module.CMDLET
                # Get the execution function from the CMDLET object
                run_fn = getattr(cmdlet_obj, 'exec', None) if hasattr(cmdlet_obj, 'exec') else None
                if callable(run_fn):
                    # Register main name
                    if hasattr(cmdlet_obj, 'name') and cmdlet_obj.name:
                        normalized_name = cmdlet_obj.name.replace('_', '-').lower()
                        REGISTRY[normalized_name] = run_fn
                    # Register all aliases
                    if hasattr(cmdlet_obj, 'aliases') and cmdlet_obj.aliases:
                        for alias in cmdlet_obj.aliases:
                            normalized_alias = alias.replace('_', '-').lower()
                            REGISTRY[normalized_alias] = run_fn
        except Exception:
            continue
 # Import root-level modules that also register cmdlets
 # Note: search_libgen, search_soulseek, and search_debrid are now consolidated into search_provider.py
 # Use search-file -provider libgen, -provider soulseek, or -provider debrid instead
 for _root_mod in ("select_cmdlet",):
    try:
        _import_module(_root_mod)
    except Exception:
        # Allow missing optional modules
        continue
 # Also import helper modules that register cmdlets
 try:
    import helper.alldebrid as _alldebrid
 except Exception:
    pass
--- a/cmdlets/_shared.py
+++ b/cmdlets/_shared.py
--- a/cmdlets/add_file.py
+++ b/cmdlets/add_file.py
@@ -0,0 +1,910 @@
 from __future__ import annotations
 from typing import Any, Dict, Optional, Sequence, Iterable, Tuple
 from collections.abc import Iterable as IterableABC
 import json
 from pathlib import Path
 import sys
 import models
 import pipeline as ctx
 from helper import hydrus as hydrus_wrapper
 from helper.logger import log, debug
 from helper.file_storage import FileStorage
 from ._shared import (
    Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs, create_pipe_object_result,
    extract_tags_from_result, extract_title_from_result, extract_known_urls_from_result,
    merge_sequences, extract_relationships, extract_duration
 )
 from helper.local_library import read_sidecar, find_sidecar, write_sidecar, LocalLibraryDB
 from helper.utils import sha256_file
 from metadata import embed_metadata_in_file
 # Use official Hydrus supported filetypes from hydrus_wrapper
 SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS
 # Initialize file storage system
 storage = FileStorage()
 def _guess_media_kind_from_suffix(media_path: Path) -> str:
    suffix = media_path.suffix.lower()
    if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}:
        return 'audio'
    if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
        return 'video'
    if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}:
        return 'image'
    if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}:
        return 'document'
    return 'other'
 def _resolve_media_kind(result: Any, media_path: Path) -> str:
    if isinstance(result, models.PipeObject):
        if getattr(result, 'media_kind', None):
            return str(result.media_kind)
    elif isinstance(result, dict):
        media_kind = result.get('media_kind')
        if media_kind:
            return str(media_kind)
        metadata = result.get('metadata')
        if isinstance(metadata, dict) and metadata.get('media_kind'):
            return str(metadata['media_kind'])
    return _guess_media_kind_from_suffix(media_path)
 def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config: Optional[dict] = None) -> tuple[Optional[Path], Optional[str], list[str], list[str]]:
    # For local origin, try to read from local database first
    if origin and origin.lower() == "local" and config:
        try:
            from helper.local_library import LocalLibraryDB
            from config import get_local_storage_path
            try:
                db_root = get_local_storage_path(config)
            except Exception:
                db_root = None
            if db_root:
                try:
                    db = LocalLibraryDB(Path(db_root))
                    try:
                        # Get tags and metadata from database
                        tags = db.get_tags(media_path) or []
                        metadata = db.get_metadata(media_path) or {}
                        known_urls = metadata.get("known_urls") or []
                        file_hash = metadata.get("hash")
                        if tags or known_urls or file_hash:
                            debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)")
                            return None, file_hash, tags, known_urls
                    finally:
                        db.close()
                except Exception as exc:
                    log(f"⚠️  Could not query local database: {exc}", file=sys.stderr)
        except Exception:
            pass
    # Fall back to sidecar file lookup
    try:
        sidecar_path = find_sidecar(media_path)
    except Exception:
        sidecar_path = None
    if not sidecar_path or not sidecar_path.exists():
        return None, None, [], []
    try:
        hash_value, tags, known_urls = read_sidecar(sidecar_path)
        return sidecar_path, hash_value, tags or [], known_urls or []
    except Exception as exc:
        log(f"⚠️  Failed to read sidecar for {media_path.name}: {exc}", file=sys.stderr)
        return sidecar_path, None, [], []
 def _resolve_file_hash(result: Any, fallback_hash: Optional[str], file_path: Path) -> Optional[str]:
    candidate = None
    if isinstance(result, models.PipeObject):
        candidate = result.file_hash
    elif isinstance(result, dict):
        candidate = result.get('file_hash') or result.get('hash')
    candidate = candidate or fallback_hash
    if candidate:
        return str(candidate)
    try:
        return sha256_file(file_path)
    except Exception as exc:
        log(f"⚠️  Could not compute SHA-256 for {file_path.name}: {exc}", file=sys.stderr)
        return None
 def _cleanup_sidecar_files(media_path: Path, *extra_paths: Optional[Path]) -> None:
    targets = [
        media_path.parent / (media_path.name + '.metadata'),
        media_path.parent / (media_path.name + '.notes'),
        media_path.parent / (media_path.name + '.tags'),
        media_path.parent / (media_path.name + '.tags.txt'),
    ]
    targets.extend(extra_paths)
    for target in targets:
        if not target:
            continue
        try:
            path_obj = Path(target)
            if path_obj.exists():
                path_obj.unlink()
        except Exception:
            continue
 def _persist_local_metadata(
    library_root: Path,
    dest_path: Path,
    tags: list[str],
    known_urls: list[str],
    file_hash: Optional[str],
    relationships: Optional[Dict[str, Any]],
    duration: Optional[float],
    media_kind: str,
 ) -> None:
    payload = {
        'hash': file_hash,
        'known_urls': known_urls,
        'relationships': relationships or [],
        'duration': duration,
        'size': None,
        'ext': dest_path.suffix.lower(),
        'media_type': media_kind,
        'media_kind': media_kind,
    }
    try:
        payload['size'] = dest_path.stat().st_size
    except OSError:
        payload['size'] = None
    try:
        debug(f"[_persist_local_metadata] Saving metadata to DB at: {library_root}")
        db_path = Path(library_root) / ".downlow_library.db"
        debug(f"[_persist_local_metadata] Database file: {db_path}, exists: {db_path.exists()}")
        debug(f"[_persist_local_metadata] File: {dest_path}, exists: {dest_path.exists()}, Tags: {len(tags)}, Hash: {file_hash}")
        debug(f"[_persist_local_metadata] Absolute dest_path: {dest_path.resolve()}")
        with LocalLibraryDB(library_root) as db:
            # Save metadata FIRST to ensure file entry is created in DB
            if any(payload.values()):
                debug(f"[_persist_local_metadata] Saving metadata payload first")
                try:
                    db.save_metadata(dest_path, payload)
                    debug(f"[_persist_local_metadata] ✅ Metadata saved")
                except Exception as meta_exc:
                    log(f"[_persist_local_metadata] ❌ Failed to save metadata: {meta_exc}", file=sys.stderr)
                    raise
            # Save tags to DB synchronously in same transaction
            # For local storage, DB is the primary source of truth
            if tags:
                try:
                    debug(f"[_persist_local_metadata] Saving {len(tags)} tags to DB")
                    db.save_tags(dest_path, tags)
                    debug(f"[_persist_local_metadata] ✅ Tags saved to DB")
                except Exception as tag_exc:
                    log(f"[_persist_local_metadata] ⚠️  Failed to save tags to DB: {tag_exc}", file=sys.stderr)
                    raise
        # NOTE: Sidecar files are intentionally NOT created for local storage
        # Local storage uses database as primary source, not sidecar files
        debug(f"[_persist_local_metadata] ✅ Metadata persisted successfully")
    except Exception as exc:
        log(f"⚠️  Failed to persist metadata to local database: {exc}", file=sys.stderr)
        import traceback
        log(traceback.format_exc(), file=sys.stderr)
 def _handle_local_transfer(media_path: Path, destination_root: Path, result: Any, config: Optional[Dict[str, Any]] = None) -> Tuple[int, Optional[Path]]:
    """Transfer a file to local storage and return (exit_code, destination_path).
    Args:
        media_path: Path to source file
        destination_root: Destination directory
        result: Result object with metadata
        config: Configuration dictionary
    Returns:
        Tuple of (exit_code, destination_path)
        - exit_code: 0 on success, 1 on failure
        - destination_path: Path to moved file on success, None on failure
    """
    destination_root = destination_root.expanduser()
    try:
        destination_root.mkdir(parents=True, exist_ok=True)
    except Exception as exc:
        log(f"❌ Cannot prepare destination directory {destination_root}: {exc}", file=sys.stderr)
        return 1, None
    tags_from_result = extract_tags_from_result(result)
    urls_from_result = extract_known_urls_from_result(result)
    # Get origin from result if available
    result_origin = None
    if hasattr(result, "origin"):
        result_origin = result.origin
    elif isinstance(result, dict):
        result_origin = result.get("origin") or result.get("source")
    sidecar_path, sidecar_hash, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=result_origin, config=config)
    # Normalize all title tags to use spaces instead of underscores BEFORE merging
    # This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
    def normalize_title_tag(tag: str) -> str:
        """Normalize a title tag by replacing underscores with spaces."""
        if str(tag).strip().lower().startswith("title:"):
            parts = tag.split(":", 1)
            if len(parts) == 2:
                value = parts[1].replace("_", " ").strip()
                return f"title:{value}"
        return tag
    tags_from_result = [normalize_title_tag(t) for t in tags_from_result]
    sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
    # Merge tags carefully: if URL has title tag, don't include sidecar title tags
    # This prevents duplicate title: tags when URL provides a title
    has_url_title = any(str(t).strip().lower().startswith("title:") for t in tags_from_result)
    if has_url_title:
        # URL has a title, filter out any sidecar title tags to avoid duplication
        sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")]
        merged_tags = merge_sequences(tags_from_result, sidecar_tags_filtered, case_sensitive=True)
    else:
        # No URL title, use all sidecar tags
        merged_tags = merge_sequences(tags_from_result, sidecar_tags, case_sensitive=True)
    merged_urls = merge_sequences(urls_from_result, sidecar_urls, case_sensitive=False)
    relationships = extract_relationships(result)
    duration = extract_duration(result)
    try:
        dest_file = storage["local"].upload(media_path, location=str(destination_root), move=True)
    except Exception as exc:
        log(f"❌ Failed to move file into {destination_root}: {exc}", file=sys.stderr)
        return 1, None
    dest_path = Path(dest_file)
    file_hash = _resolve_file_hash(result, sidecar_hash, dest_path)
    media_kind = _resolve_media_kind(result, dest_path)
    # Ensure only ONE title tag that matches the actual filename
    # Remove all existing title tags and add one based on the saved filename
    merged_tags_no_titles = [t for t in merged_tags if not str(t).strip().lower().startswith("title:")]
    filename_title = dest_path.stem.replace("_", " ").strip()
    if filename_title:
        merged_tags_no_titles.insert(0, f"title:{filename_title}")
    _persist_local_metadata(destination_root, dest_path, merged_tags_no_titles, merged_urls, file_hash, relationships, duration, media_kind)
    _cleanup_sidecar_files(media_path, sidecar_path)
    debug(f"✅ Moved to local library: {dest_path}")
    return 0, dest_path
 def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
    """Upload/copy a file to specified location.
    Returns 0 on success, non-zero on failure.
    """
    import sys  # For stderr output
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    debug("Starting add-file cmdlet")
    # Handle list of results (from piped commands that emit multiple items)
    if isinstance(result, list):
        debug(f"Processing {len(result)} piped files")
        success_count = 0
        for item in result:
            exit_code = _run(item, _args, config)
            if exit_code == 0:
                success_count += 1
        return 0 if success_count > 0 else 1
    # Parse arguments using CMDLET spec
    parsed = parse_cmdlet_args(_args, CMDLET)
    location: Optional[str] = None
    provider_name: Optional[str] = None
    delete_after_upload = False
    # Check if -path argument was provided to use direct file path instead of piped result
    path_arg = parsed.get("path")
    if path_arg:
        # Create a pseudo-result object from the file path
        media_path = Path(str(path_arg).strip())
        if not media_path.exists():
            log(f"❌ File not found: {media_path}")
            return 1
        # Create result dict with the file path and origin 'wild' for direct path inputs
        result = {"target": str(media_path), "origin": "wild"}
        log(f"Using direct file path: {media_path}")
    # Get location from parsed args - now uses SharedArgs.STORAGE so key is "storage"
    location = parsed.get("storage")
    if location:
        location = str(location).lower().strip()
    # Get file provider from parsed args
    provider_name = parsed.get("provider")
    if provider_name:
        provider_name = str(provider_name).lower().strip()
    # Check for delete flag (presence in parsed dict means it was provided)
    delete_after_upload = "delete" in parsed
    # Either storage or provider must be specified, but not both
    if location is None and provider_name is None:
        log("Either -storage or -provider must be specified")
        log("  -storage options: 'hydrus', 'local', or a directory path")
        log("  -provider options: '0x0'")
        return 1
    if location is not None and provider_name is not None:
        log("❌ Cannot specify both -storage and -provider")
        return 1
    # Validate location (storage backends)
    is_valid_location = False
    if location is not None:
        valid_locations = {'hydrus', 'local'}
        is_valid_location = location in valid_locations
    is_local_path = not is_valid_location and location is not None and ('/' in location or '\\' in location or ':' in location)
    if location is not None and not (is_valid_location or is_local_path):
        log(f"❌ Invalid location: {location}")
        log(f"Valid options: 'hydrus', '0x0', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
        return 1
    # Extract tags/known URLs from pipeline objects if available
    pipe_object_tags = extract_tags_from_result(result)
    if pipe_object_tags:
        log(f"Extracted {len(pipe_object_tags)} tag(s) from pipeline result: {', '.join(pipe_object_tags[:5])}", file=sys.stderr)
    pipe_known_urls = extract_known_urls_from_result(result)
    # Resolve media path: get from piped result
    # Support both object attributes (getattr) and dict keys (get)
    target = None
    origin = None
    # Try object attributes first
    if hasattr(result, "target"):
        target = result.target
    elif hasattr(result, "path"):
        target = result.path
    elif hasattr(result, "file_path"):
        target = result.file_path
    # Try dict keys if object attributes failed
    elif isinstance(result, dict):
        target = (result.get("target") or result.get("path") or result.get("file_path") or 
                  result.get("__file_path") or result.get("__path") or result.get("__target"))
    # Get origin to detect Hydrus files
    if hasattr(result, "origin"):
        origin = result.origin
    elif hasattr(result, "source"):
        origin = result.source
    elif isinstance(result, dict):
        origin = result.get("origin") or result.get("source") or result.get("__source")
    # Convert target to string and preserve URLs (don't let Path() mangle them)
    target_str = str(target) if target else None
    # Check if this is a playlist item that needs to be downloaded first
    is_playlist_item = isinstance(result, dict) and result.get("__source") == "playlist-probe"
    if is_playlist_item and target_str and target_str.lower().startswith(("http://", "https://")):
        # This is a playlist item URL - we need to download it first
        log(f"Detected playlist item, downloading: {target_str}", file=sys.stderr)
        # Extract item number if available
        item_num = None
        if "__action" in result and result["__action"].startswith("playlist-item:"):
            item_num = result["__action"].split(":")[1]
        elif "index" in result:
            item_num = result["index"]
        # Call download-data to download this specific item
        # Pass the item number so it knows which track to download
        from cmdlets import download_data as dl_module
        # Capture emissions from download-data to process them
        captured_results = []
        original_emit = ctx.emit
        def capture_emit(obj):
            captured_results.append(obj)
            # Also emit to original so user sees progress/output if needed
            # But since add-file is usually terminal, we might not need to
            # original_emit(obj) 
        # Temporarily hook the pipeline emit function
        ctx.emit = capture_emit
        try:
            if item_num:
                # Pass a marker dict to tell download-data which item to get
                download_result = dl_module._run(
                    {
                        "__playlist_url": str(target_str),
                        "__playlist_item": int(item_num)
                    },
                    [],
                    config
                )
            else:
                # Fallback: just download the URL (will show all items)
                download_result = dl_module._run(None, [str(target_str)], config)
        finally:
            # Restore original emit function
            ctx.emit = original_emit
        if download_result != 0:
            log(f"❌ Failed to download playlist item", file=sys.stderr)
            return 1
        log(f"✓ Playlist item downloaded, processing {len(captured_results)} file(s)...", file=sys.stderr)
        # Process the downloaded files recursively
        success_count = 0
        for res in captured_results:
            # Recursively call add-file with the downloaded result
            # This ensures tags and metadata from download-data are applied
            if _run(res, _args, config) == 0:
                success_count += 1
        return 0 if success_count > 0 else 1
    # Determine media_path from result
    media_path: Optional[Path] = None
    is_hydrus_file = origin and origin.lower() == "hydrus"
    if target_str:
        # Check if it's a URL or Hydrus hash
        if target_str.lower().startswith(("http://", "https://")):
            media_path = None  # Will handle as Hydrus file below
        elif not is_hydrus_file:
            # Only treat as local path if not a Hydrus file
            media_path = Path(target_str)
    if media_path is None and not is_hydrus_file and (target_str is None or not target_str.lower().startswith(("http://", "https://"))):
        # Check if this is a format object from download-data
        if isinstance(result, dict) and result.get('format_id') is not None:
            log("❌ Format object received, but add-file expects a downloaded file")
            log(f"   Tip: Use @N to automatically select and download the format")
            log(f"   Streamlined workflow:")
            log(f"     download-data \"URL\" | @{result.get('index', 'N')} | add-file -storage local")
            log(f"   (The @N automatically expands to download-data \"URL\" -item N)")
            return 1
        log("❌ File not found: provide a piped file result or local file path")
        return 1
    # Check if this is a Hydrus file - fetch the actual file path from Hydrus
    if is_hydrus_file and target_str:
        log(f"Detected Hydrus file (hash: {target_str}), fetching local path from Hydrus...", file=sys.stderr)
        try:
            from helper import hydrus
            # Get the Hydrus client
            client = hydrus.get_client(config)
            if not client:
                log(f"❌ Hydrus client unavailable", file=sys.stderr)
                return 1
            # target_str is the hash - need to get the actual file path from Hydrus
            file_hash = target_str
            # Call the /get_files/file_path endpoint to get the actual file path
            response = client.get_file_path(file_hash)
            if not response or not isinstance(response, dict):
                log(f"❌ Hydrus file_path endpoint returned invalid response", file=sys.stderr)
                return 1
            file_path_str = response.get("path")
            if not file_path_str:
                log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
                return 1
            media_path = Path(file_path_str)
            if not media_path.exists():
                log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
                return 1
            log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)
        except Exception as exc:
            log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
            import traceback
            log(f"Traceback: {traceback.format_exc()}", file=sys.stderr)
            return 1
    # Generic URL handler: if target is a URL and we haven't resolved a local path yet
    # This handles cases like "search-file -provider openlibrary ... | add-file -storage local"
    if target_str and target_str.lower().startswith(("http://", "https://")) and not is_hydrus_file and not is_playlist_item and media_path is None:
        log(f"Target is a URL, delegating to download-data: {target_str}", file=sys.stderr)
        from cmdlets import download_data as dl_module
        dl_args = []
        if location:
            dl_args.extend(["-storage", location])
        # Map provider 0x0 to storage 0x0 for download-data
        if provider_name == "0x0":
             dl_args.extend(["-storage", "0x0"])
        return dl_module._run(result, dl_args, config)
    if media_path is None:
        log("File path could not be resolved")
        return 1
    if not media_path.exists() or not media_path.is_file():
        log(f"File not found: {media_path}")
        return 1
    # Validate file type - only accept Hydrus-supported files
    file_extension = media_path.suffix.lower()
    if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
        log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
        log(f"Hydrus supports the following file types:", file=sys.stderr)
        # Display by category from hydrus_wrapper
        for category, extensions in sorted(hydrus_wrapper.SUPPORTED_FILETYPES.items()):
            ext_list = ', '.join(sorted(e.lstrip('.') for e in extensions.keys()))
            log(f"{category.capitalize()}: {ext_list}", file=sys.stderr)
        log(f"Skipping this file: {media_path.name}", file=sys.stderr)
        return 1
    # Handle based on provider or storage
    if provider_name is not None:
        # Use file provider (e.g., 0x0.st)
        from helper.search_provider import get_file_provider
        log(f"Uploading via {provider_name} file provider: {media_path.name}", file=sys.stderr)
        try:
            file_provider = get_file_provider(provider_name, config)
            if file_provider is None:
                log(f"❌ File provider '{provider_name}' not available", file=sys.stderr)
                return 1
            hoster_url = file_provider.upload(media_path)
            log(f"✅ File uploaded to {provider_name}: {hoster_url}", file=sys.stderr)
            # Associate the URL with the file in Hydrus if possible
            current_hash = locals().get('file_hash')
            if not current_hash:
                current_hash = _resolve_file_hash(result, None, media_path)
            if current_hash:
                try:
                    client = hydrus_wrapper.get_client(config)
                    if client:
                        client.associate_url(current_hash, hoster_url)
                        log(f"✅ Associated URL with file hash {current_hash}", file=sys.stderr)
                except Exception as exc:
                    log(f"⚠️  Could not associate URL with Hydrus file: {exc}", file=sys.stderr)
        except Exception as exc:
            log(f"❌ {provider_name} upload failed: {exc}", file=sys.stderr)
            return 1
        if delete_after_upload:
            try:
                media_path.unlink()
                _cleanup_sidecar_files(media_path)
                log(f"✅ Deleted file and sidecar", file=sys.stderr)
            except Exception as exc:
                log(f"⚠️  Could not delete file: {exc}", file=sys.stderr)
        return 0
    # Handle storage-based operations (location is not None here)
    valid_locations = {'hydrus', 'local'}
    is_valid_location = location in valid_locations
    is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location)
    if not (is_valid_location or is_local_path):
        log(f"❌ Invalid location: {location}")
        log(f"Valid options: 'hydrus', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
        return 1
    if location == 'local':
        try:
            from config import get_local_storage_path
            resolved_dir = get_local_storage_path(config)
        except Exception:
            resolved_dir = None
        if not resolved_dir:
            resolved_dir = config.get("LocalDir") or config.get("OutputDir")
        if not resolved_dir:
            log("❌ No local storage path configured. Set 'storage.local.path' in config.json", file=sys.stderr)
            return 1
        log(f"Moving into configured local library: {resolved_dir}", file=sys.stderr)
        exit_code, dest_path = _handle_local_transfer(media_path, Path(resolved_dir), result, config)
        # After successful local transfer, emit result for pipeline continuation
        # This allows downstream commands like add-tags to chain automatically
        if exit_code == 0 and dest_path:
            # Extract tags from result for emission
            emit_tags = extract_tags_from_result(result)
            file_hash = _resolve_file_hash(result, None, dest_path)
            # Extract title from original result, fallback to filename if not available
            result_title = extract_title_from_result(result) or dest_path.name
            # Always emit result for local files, even if no tags
            # This allows @N selection and piping to downstream commands
            result_dict = create_pipe_object_result(
                source='local',
                identifier=str(dest_path),
                file_path=str(dest_path),
                cmdlet_name='add-file',
                title=result_title,
                file_hash=file_hash,
                tags=emit_tags if emit_tags else [],
                target=str(dest_path) # Explicit target for get-file
            )
            ctx.emit(result_dict)
            # Clear the stage table so downstream @N doesn't try to re-run download-data
            # Next stage will use these local file results, not format objects
            ctx.set_current_stage_table(None)
        return exit_code
    elif is_local_path:
        try:
            destination_root = Path(location)
        except Exception as exc:
            log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
            return 1
        log(f"Moving to local path: {destination_root}", file=sys.stderr)
        exit_code, dest_path = _handle_local_transfer(media_path, destination_root, result, config)
        # After successful local transfer, emit result for pipeline continuation
        if exit_code == 0 and dest_path:
            # Extract tags from result for emission
            emit_tags = extract_tags_from_result(result)
            file_hash = _resolve_file_hash(result, None, dest_path)
            # Extract title from original result, fallback to filename if not available
            result_title = extract_title_from_result(result) or dest_path.name
            # Always emit result for local files, even if no tags
            # This allows @N selection and piping to downstream commands
            result_dict = create_pipe_object_result(
                source='local',
                identifier=str(dest_path),
                file_path=str(dest_path),
                cmdlet_name='add-file',
                title=result_title,
                file_hash=file_hash,
                tags=emit_tags if emit_tags else [],
                target=str(dest_path) # Explicit target for get-file
            )
            ctx.emit(result_dict)
            # Clear the stage table so downstream @N doesn't try to re-run download-data
            # Next stage will use these local file results, not format objects
            ctx.set_current_stage_table(None)
        return exit_code
    # location == 'hydrus'
    # Compute file hash to check if already in Hydrus
    log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr)
    log(f"Computing SHA-256 hash for: {media_path.name}", file=sys.stderr)
    try:
        file_hash = sha256_file(media_path)
    except Exception as exc:
        log(f"❌ Failed to compute file hash: {exc}", file=sys.stderr)
        return 1
    log(f"File hash: {file_hash}", file=sys.stderr)
    # Read sidecar tags and known URLs first (for tagging)
    sidecar_path, hash_from_sidecar, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=origin, config=config)
    if sidecar_path:
        log(f"Found sidecar at: {sidecar_path}", file=sys.stderr)
        log(f"Read sidecar: hash={hash_from_sidecar}, {len(sidecar_tags)} tag(s), {len(sidecar_urls)} URL(s)", file=sys.stderr)
        if sidecar_tags:
            log(f"Sidecar tags: {sidecar_tags}", file=sys.stderr)
        if sidecar_urls:
            log(f"Sidecar URLs: {sidecar_urls}", file=sys.stderr)
    else:
        log(f"No sidecar found for {media_path.name}", file=sys.stderr)
    # Normalize all title tags to use spaces instead of underscores BEFORE merging
    # This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
    def normalize_title_tag(tag: str) -> str:
        """Normalize a title tag by replacing underscores with spaces."""
        if str(tag).strip().lower().startswith("title:"):
            parts = tag.split(":", 1)
            if len(parts) == 2:
                value = parts[1].replace("_", " ").strip()
                return f"title:{value}"
        return tag
    sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
    pipe_object_tags = [normalize_title_tag(t) for t in pipe_object_tags]
    # Merge tags from PipeObject with tags from sidecar
    # NOTE: Remove ALL existing title tags and use only filename-based title
    # The filename is the source of truth for the title
    tags_without_titles = [t for t in merge_sequences(sidecar_tags, pipe_object_tags, case_sensitive=True) 
                          if not str(t).strip().lower().startswith("title:")]
    # Ensure ONE title tag based on the actual filename
    filename_title = media_path.stem.replace("_", " ").strip()
    if filename_title:
        tags = [f"title:{filename_title}"] + tags_without_titles
    else:
        tags = tags_without_titles
    known_urls = merge_sequences(sidecar_urls, pipe_known_urls, case_sensitive=False)
    if pipe_object_tags:
        log(f"Merged pipeline tags. Total tags now: {len(tags)}", file=sys.stderr)
    # Write metadata to file before uploading (only for local storage, not for Hydrus)
    # Hydrus stores tags separately, so we don't need to modify the file
    if location != 'hydrus':
        try:
            if tags:
                # Determine file kind from extension
                file_kind = ''
                sfx = media_path.suffix.lower()
                if sfx in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
                    file_kind = 'audio'
                elif sfx in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
                    file_kind = 'video'
                if embed_metadata_in_file(media_path, tags, file_kind):
                    log(f"Wrote metadata tags to file: {media_path.name}", file=sys.stderr)
                else:
                    log(f"Note: Could not embed metadata in file (may not be supported format)", file=sys.stderr)
        except Exception as exc:
            log(f"Warning: Failed to write metadata to file: {exc}", file=sys.stderr)
    else:
        log(f"Note: Skipping FFmpeg metadata embedding for Hydrus (tags managed separately)", file=sys.stderr)
    # Use FileStorage backend to upload to Hydrus
    try:
        file_hash = storage["hydrus"].upload(
            media_path,
            config=config,
            tags=tags,
        )
        log(f"✅ File uploaded to Hydrus: {file_hash}", file=sys.stderr)
    except Exception as exc:
        log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr)
        return 1
    # Associate known URLs in Hydrus metadata
    url_count = 0
    if known_urls:
        try:
            client = hydrus_wrapper.get_client(config)
            if client:
                for url in known_urls:
                    u = str(url or "").strip()
                    if not u:
                        continue
                    try:
                        client.associate_url(file_hash, u)
                    except Exception as exc:
                        log(f"Hydrus associate-url failed for {u}: {exc}", file=sys.stderr)
                        continue
                    url_count += 1
        except Exception as exc:
            log(f"Failed to associate URLs: {exc}", file=sys.stderr)
    if url_count:
        log(f"✅ Associated {url_count} URL(s)", file=sys.stderr)
    else:
        log(f"No URLs to associate", file=sys.stderr)
    _cleanup_sidecar_files(media_path, sidecar_path)
    # Update in-memory result for downstream pipes
    try:
        # Only update piped result objects; direct -path usage may have a dummy result
        setattr(result, "hash_hex", file_hash)
        # Preserve media_kind for downstream commands (e.g., open)
        if not hasattr(result, "media_kind") or getattr(result, "media_kind") == "other":
            # Try to infer media_kind from file extension or keep existing
            suffix = media_path.suffix.lower()
            if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.rtf', '.md', '.html', '.htm', '.doc', '.docx'}:
                setattr(result, "media_kind", "document")
        if hasattr(result, "columns") and isinstance(getattr(result, "columns"), list):
            cols = list(getattr(result, "columns"))
            if ("Hash", file_hash) not in cols:
                cols.append(("Hash", file_hash))
            setattr(result, "columns", cols)
    except Exception:
        pass
    # If -delete flag is set, delete the file and .tags after successful upload
    if delete_after_upload:
        log(f"Deleting local files (as requested)...", file=sys.stderr)
        try:
            media_path.unlink()
            log(f"✅ Deleted: {media_path.name}", file=sys.stderr)
        except OSError as exc:
            log(f"Failed to delete file: {exc}", file=sys.stderr)
        # Delete .tags sidecar if it exists
        if sidecar_path is not None:
            try:
                sidecar_path.unlink()
                log(f"✅ Deleted: {sidecar_path.name}", file=sys.stderr)
            except OSError as exc:
                log(f"Failed to delete sidecar: {exc}", file=sys.stderr)
    log(f"✅ Successfully completed: {media_path.name} (hash={file_hash})", file=sys.stderr)
    # Emit result for Hydrus uploads so downstream commands know about it
    if location == 'hydrus':
        # Extract title from original result, fallback to filename if not available
        result_title = extract_title_from_result(result) or media_path.name
        result_dict = create_pipe_object_result(
            source='hydrus',
            identifier=file_hash,
            file_path=f"hydrus:{file_hash}",
            cmdlet_name='add-file',
            title=result_title,
            file_hash=file_hash,
            extra={
                'storage_source': 'hydrus',
                'hydrus_hash': file_hash,
                'tags': tags,
                'known_urls': known_urls,
            }
        )
        ctx.emit(result_dict)
        # Clear the stage table so downstream @N doesn't try to re-run download-data
        # Next stage will use these Hydrus file results, not format objects
        ctx.set_current_stage_table(None)
    return 0
 CMDLET = Cmdlet(
    name="add-file",
    summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
    usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
    args=[
        CmdletArg(name="path", type="str", required=False, description="Direct file path to upload (alternative to piped result)", alias="p"),
        SharedArgs.STORAGE,  # For hydrus, local, or directory paths
        CmdletArg(name="provider", type="str", required=False, description="File hosting provider (e.g., 0x0 for 0x0.st)", alias="prov"),
        CmdletArg(name="delete", type="flag", required=False, description="Delete the file and its .tags after successful upload.", alias="del"),
    ],
    details=[
        "- Storage location options (use -storage):",
        "    hydrus: Upload to Hydrus database with metadata tagging",
        "    local: Copy file to local directory",
        "    <path>: Copy file to specified directory",
        "- File provider options (use -provider):",
        "    0x0: Upload to 0x0.st for temporary hosting with public URL",
        "- Accepts files from official Hydrus supported types: images, animations, videos, audio, applications, projects, and archives.",
        "- When uploading to Hydrus: adds tags from .tags sidecar and associates known_urls",
        "- When using file provider: uploads to service, adds URL to sidecar",
        "- When copying locally: copies file with original metadata preserved",
        "- Use -delete flag to automatically delete the file and .tags after successful operation.",
    ],
 )
--- a/cmdlets/add_note.py
+++ b/cmdlets/add_note.py
@@ -0,0 +1,84 @@
 from __future__ import annotations
 from typing import Any, Dict, Sequence
 import json
 from . import register
 import models
 import pipeline as ctx
 from helper import hydrus as hydrus_wrapper
 from ._shared import Cmdlet, CmdletArg, normalize_hash
 from helper.logger import log
 CMDLET = Cmdlet(
    name="add-note",
    summary="Add or set a note on a Hydrus file.",
    usage="add-note [-hash <sha256>] <name> <text>",
    args=[
        CmdletArg("hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
        CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'source', etc.)."),
        CmdletArg("text", type="string", required=True, description="The note text/content to store.", variadic=True),
    ],
    details=[
        "- Notes are stored in the 'my notes' service by default.",
    ],
 )
@register(["add-note", "set-note", "add_note"])  # aliases
 def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    from ._shared import parse_cmdlet_args
    parsed = parse_cmdlet_args(args, CMDLET)
    override_hash = parsed.get("hash")
    name = parsed.get("name")
    text_parts = parsed.get("text")
    if not name:
        log("Requires a note name")
        return 1
    name = str(name).strip()
    if isinstance(text_parts, list):
        text = " ".join(text_parts).strip()
    else:
        text = str(text_parts or "").strip()
    if not text:
        log("Empty note text")
        return 1
    # Handle @N selection which creates a list - extract the first item
    if isinstance(result, list) and len(result) > 0:
        result = result[0]
    hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
    if not hash_hex:
        log("Selected result does not include a Hydrus hash")
        return 1
    try:
        client = hydrus_wrapper.get_client(config)
    except Exception as exc:
        log(f"Hydrus client unavailable: {exc}")
        return 1
    if client is None:
        log("Hydrus client unavailable")
        return 1
    try:
        service_name = "my notes"
        client.set_notes(hash_hex, {name: text}, service_name)
    except Exception as exc:
        log(f"Hydrus add-note failed: {exc}")
        return 1
    ctx.emit(f"Added note '{name}' ({len(text)} chars)")
    return 0
--- a/cmdlets/add_relationship.py
+++ b/cmdlets/add_relationship.py
@@ -0,0 +1,264 @@
 """Add file relationships in Hydrus based on relationship tags in sidecar."""
 from __future__ import annotations
 from typing import Any, Dict, Optional, Sequence
 import json
 import re
 from pathlib import Path
 import sys
 from helper.logger import log
 from . import register
 import models
 import pipeline as ctx
 from helper import hydrus as hydrus_wrapper
 from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
 from helper.local_library import read_sidecar, find_sidecar
 CMDLET = Cmdlet(
    name="add-relationship",
    summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.",
    usage="add-relationship    OR    add-relationship -path <file>",
    args=[
        CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
    ],
    details=[
        "- Reads relationship tags from sidecar (format: 'relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>')",
        "- Calls Hydrus API to associate the hashes as relationships",
        "- Supports three relationship types: king (primary), alt (alternative), related (other versions)",
        "- Works with piped file results or -path argument for direct invocation",
    ],
 )
 def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
    """Normalize a hash hex string to lowercase 64-char format."""
    if not value or not isinstance(value, str):
        return None
    normalized = value.strip().lower()
    if len(normalized) == 64 and all(c in '0123456789abcdef' for c in normalized):
        return normalized
    return None
 def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]:
    """Parse relationship tag like 'relationship: hash(king)<HASH>,hash(alt)<HASH>'.
    Returns a dict like {"king": ["HASH1"], "alt": ["HASH2"], ...}
    """
    result: Dict[str, list[str]] = {}
    if not isinstance(tag_value, str):
        return result
    # Match patterns like hash(king)HASH or hash(type)HASH (no angle brackets)
    pattern = r'hash\((\w+)\)([a-fA-F0-9]{64})'
    matches = re.findall(pattern, tag_value)
    for rel_type, hash_value in matches:
        normalized = _normalise_hash_hex(hash_value)
        if normalized:
            if rel_type not in result:
                result[rel_type] = []
            result[rel_type].append(normalized)
    return result
@register(["add-relationship", "add-rel"])  # primary name and alias
 def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
    """Associate file relationships in Hydrus.
    Two modes of operation:
    1. Read from sidecar: Looks for relationship tags in the file's sidecar (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>")
    2. Pipeline mode: When piping multiple results, the first becomes "king" and subsequent items become "alt"
    Returns 0 on success, non-zero on failure.
    """
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    # Parse arguments using CMDLET spec
    parsed = parse_cmdlet_args(_args, CMDLET)
    arg_path: Optional[Path] = None
    if parsed:
        # Get the first arg value (e.g., -path)
        first_arg_name = CMDLET.get("args", [{}])[0].get("name") if CMDLET.get("args") else None
        if first_arg_name and first_arg_name in parsed:
            arg_value = parsed[first_arg_name]
            try:
                arg_path = Path(str(arg_value)).expanduser()
            except Exception:
                arg_path = Path(str(arg_value))
    # Get Hydrus client
    try:
        client = hydrus_wrapper.get_client(config)
    except Exception as exc:
        log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
        return 1
    if client is None:
        log("Hydrus client unavailable", file=sys.stderr)
        return 1
    # Handle @N selection which creates a list - extract the first item
    if isinstance(result, list) and len(result) > 0:
        result = result[0]
    # Check if we're in pipeline mode (have a hash) or file mode
    file_hash = getattr(result, "hash_hex", None)
    # PIPELINE MODE: Track relationships across multiple items
    if file_hash:
        file_hash = _normalise_hash_hex(file_hash)
        if not file_hash:
            log("Invalid file hash format", file=sys.stderr)
            return 1
        # Load or initialize king hash from pipeline context
        try:
            king_hash = ctx.load_value("relationship_king")
        except Exception:
            king_hash = None
        # If this is the first item, make it the king
        if not king_hash:
            try:
                ctx.store_value("relationship_king", file_hash)
                log(f"Established king hash: {file_hash}", file=sys.stderr)
                return 0  # First item just becomes the king, no relationships yet
            except Exception:
                pass
        # If we already have a king and this is a different hash, link them
        if king_hash and king_hash != file_hash:
            try:
                client.set_relationship(file_hash, king_hash, "alt")
                log(
                    f"[add-relationship] Set alt relationship: {file_hash} <-> {king_hash}",
                    file=sys.stderr
                )
                return 0
            except Exception as exc:
                log(f"Failed to set relationship: {exc}", file=sys.stderr)
                return 1
        return 0
    # FILE MODE: Read relationships from sidecar
    log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr)
    # Resolve media path from -path arg or result target
    target = getattr(result, "target", None) or getattr(result, "path", None)
    media_path = arg_path if arg_path is not None else Path(str(target)) if isinstance(target, str) else None
    if media_path is None:
        log("Provide -path <file> or pipe a local file result", file=sys.stderr)
        return 1
    # Validate local file
    if str(media_path).lower().startswith(("http://", "https://")):
        log("This cmdlet requires a local file path, not a URL", file=sys.stderr)
        return 1
    if not media_path.exists() or not media_path.is_file():
        log(f"File not found: {media_path}", file=sys.stderr)
        return 1
    # Build Hydrus client
    try:
        client = hydrus_wrapper.get_client(config)
    except Exception as exc:
        log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
        return 1
    if client is None:
        log("Hydrus client unavailable", file=sys.stderr)
        return 1
    # Read sidecar to find relationship tags
    sidecar_path = find_sidecar(media_path)
    if sidecar_path is None:
        log(f"No sidecar found for {media_path.name}", file=sys.stderr)
        return 1
    try:
        _, tags, _ = read_sidecar(sidecar_path)
    except Exception as exc:
        log(f"Failed to read sidecar: {exc}", file=sys.stderr)
        return 1
    # Find relationship tags (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>")
    relationship_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("relationship:")]
    if not relationship_tags:
        log(f"No relationship tags found in sidecar", file=sys.stderr)
        return 0  # Not an error, just nothing to do
    # Get the file hash from result (should have been set by add-file)
    file_hash = getattr(result, "hash_hex", None)
    if not file_hash:
        log("File hash not available (run add-file first)", file=sys.stderr)
        return 1
    file_hash = _normalise_hash_hex(file_hash)
    if not file_hash:
        log("Invalid file hash format", file=sys.stderr)
        return 1
    # Parse relationships from tags and apply them
    success_count = 0
    error_count = 0
    for rel_tag in relationship_tags:
        try:
            # Parse: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>"
            rel_str = rel_tag.split(":", 1)[1].strip()  # Get part after "relationship:"
            # Parse relationships
            rels = _extract_relationships_from_tag(f"relationship: {rel_str}")
            # Set the relationships in Hydrus
            for rel_type, related_hashes in rels.items():
                if not related_hashes:
                    continue
                for related_hash in related_hashes:
                    # Don't set relationship between hash and itself
                    if file_hash == related_hash:
                        continue
                    try:
                        client.set_relationship(file_hash, related_hash, rel_type)
                        log(
                            f"[add-relationship] Set {rel_type} relationship: "
                            f"{file_hash} <-> {related_hash}",
                            file=sys.stderr
                        )
                        success_count += 1
                    except Exception as exc:
                        log(f"Failed to set {rel_type} relationship: {exc}", file=sys.stderr)
                        error_count += 1
        except Exception as exc:
            log(f"Failed to parse relationship tag: {exc}", file=sys.stderr)
            error_count += 1
    if success_count > 0:
        log(f"Successfully set {success_count} relationship(s) for {media_path.name}", file=sys.stderr)
        ctx.emit(f"add-relationship: {media_path.name} ({success_count} relationships set)")
        return 0
    elif error_count == 0:
        log(f"No relationships to set", file=sys.stderr)
        return 0  # Success with nothing to do
    else:
        log(f"Failed with {error_count} error(s)", file=sys.stderr)
        return 1
--- a/cmdlets/add_tags.py
+++ b/cmdlets/add_tags.py
@@ -0,0 +1,276 @@
 from __future__ import annotations
 from typing import Any, Dict, List, Sequence, Optional
 import json
 from pathlib import Path
 import sys
 from helper.logger import log
 from . import register
 import models
 import pipeline as ctx
 from ._shared import normalize_result_input, filter_results_by_temp
 from helper import hydrus as hydrus_wrapper
 from helper.local_library import read_sidecar, write_sidecar, find_sidecar, has_sidecar, LocalLibraryDB
 from metadata import rename_by_metadata
 from ._shared import Cmdlet, CmdletArg, normalize_hash, parse_tag_arguments, expand_tag_groups, parse_cmdlet_args
 from config import get_local_storage_path
 CMDLET = Cmdlet(
    name="add-tags",
    summary="Add tags to a Hydrus file or write them to a local .tags sidecar.",
    usage="add-tags [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
    args=[
        CmdletArg("-hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
        CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
        CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
        CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."),
        CmdletArg("tags", type="string", required=True, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax.", variadic=True),
    ],
    details=[
        "- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.",
        "- Without -hash and when the selection is a local file, tags are written to <file>.tags.",
        "- With a Hydrus hash, tags are sent to the 'my tags' service.",
        "- Multiple tags can be comma-separated or space-separated.",
        "- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
        "- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
        "- Use -duplicate to copy EXISTING tag values to new namespaces:",
        "  Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
        "  Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
        "- The source namespace must already exist in the file being tagged.",
        "- Target namespaces that already have a value are skipped (not overwritten).",
    ],
 )
@register(["add-tag", "add-tags"])
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    """Add tags to a file with smart filtering for pipeline results."""
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    # Parse arguments
    parsed = parse_cmdlet_args(args, CMDLET)
    # Check for --all flag
    include_temp = parsed.get("all", False)
    # Normalize input to list
    results = normalize_result_input(result)
    # Filter by temp status (unless --all is set)
    if not include_temp:
        results = filter_results_by_temp(results, include_temp=False)
    if not results:
        log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
        return 1
    # Get tags from arguments
    raw_tags = parsed.get("tags", [])
    if isinstance(raw_tags, str):
        raw_tags = [raw_tags]
    # Handle -list argument (convert to {list} syntax)
    list_arg = parsed.get("list")
    if list_arg:
        for l in list_arg.split(','):
            l = l.strip()
            if l:
                raw_tags.append(f"{{{l}}}")
    # Parse and expand tags
    tags_to_add = parse_tag_arguments(raw_tags)
    tags_to_add = expand_tag_groups(tags_to_add)
    # Get other flags
    hash_override = normalize_hash(parsed.get("hash"))
    duplicate_arg = parsed.get("duplicate")
    # If no tags provided (and no list), write sidecar files with embedded tags
    # Note: Since 'tags' is required=True in CMDLET, this block might be unreachable via CLI
    # unless called programmatically or if required check is bypassed.
    if not tags_to_add and not duplicate_arg:
        # Write sidecar files with the tags that are already in the result dicts
        sidecar_count = 0
        for res in results:
            # Handle both dict and PipeObject formats
            file_path = None
            tags = []
            file_hash = ""
            if isinstance(res, models.PipeObject):
                file_path = res.file_path
                tags = res.extra.get('tags', [])
                file_hash = res.file_hash or ""
            elif isinstance(res, dict):
                file_path = res.get('file_path')
                tags = res.get('tags', [])  # Check both tags and extra['tags']
                if not tags and 'extra' in res:
                    tags = res['extra'].get('tags', [])
                file_hash = res.get('file_hash', "")
            if not file_path:
                log(f"[add_tags] Warning: Result has no file_path, skipping", file=sys.stderr)
                ctx.emit(res)
                continue
            if tags:
                # Write sidecar file for this file with its tags
                try:
                    sidecar_path = write_sidecar(Path(file_path), tags, [], file_hash)
                    log(f"[add_tags] Wrote {len(tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
                    sidecar_count += 1
                except Exception as e:
                    log(f"[add_tags] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr)
            ctx.emit(res)
        if sidecar_count > 0:
            log(f"[add_tags] Wrote {sidecar_count} sidecar file(s) with embedded tags", file=sys.stderr)
        else:
            log(f"[add_tags] No tags to write - passed {len(results)} result(s) through unchanged", file=sys.stderr)
        return 0
    # Tags ARE provided - append them to each result and write sidecar files or add to Hydrus
    sidecar_count = 0
    for res in results:
        # Handle both dict and PipeObject formats
        file_path = None
        existing_tags = []
        file_hash = ""
        storage_source = None
        hydrus_hash = None
        if isinstance(res, models.PipeObject):
            file_path = res.file_path
            existing_tags = res.extra.get('tags', [])
            file_hash = res.file_hash or ""
            storage_source = res.extra.get('storage_source') or res.extra.get('source')
            hydrus_hash = res.extra.get('hydrus_hash')
        elif isinstance(res, dict):
            file_path = res.get('file_path') or res.get('path')
            existing_tags = res.get('tags', [])
            if not existing_tags and 'extra' in res:
                existing_tags = res['extra'].get('tags', [])
            file_hash = res.get('file_hash', "")
            storage_source = res.get('storage_source') or res.get('source') or res.get('origin')
            if not storage_source and 'extra' in res:
                storage_source = res['extra'].get('storage_source') or res['extra'].get('source')
            # For Hydrus results from search-file, look for hash, hash_hex, or target (all contain the hash)
            hydrus_hash = res.get('hydrus_hash') or res.get('hash') or res.get('hash_hex')
            if not hydrus_hash and 'extra' in res:
                hydrus_hash = res['extra'].get('hydrus_hash') or res['extra'].get('hash') or res['extra'].get('hash_hex')
        else:
            ctx.emit(res)
            continue
        # Apply hash override if provided
        if hash_override:
            hydrus_hash = hash_override
            # If we have a hash override, we treat it as a Hydrus target
            storage_source = "hydrus"
        if not file_path and not hydrus_hash:
            log(f"[add_tags] Warning: Result has neither file_path nor hash available, skipping", file=sys.stderr)
            ctx.emit(res)
            continue
        # Handle -duplicate logic (copy existing tags to new namespaces)
        if duplicate_arg:
            # Parse duplicate format: source:target1,target2 or source,target1,target2
            parts = duplicate_arg.split(':')
            source_ns = ""
            targets = []
            if len(parts) > 1:
                # Explicit format: source:target1,target2
                source_ns = parts[0]
                targets = parts[1].split(',')
            else:
                # Inferred format: source,target1,target2
                parts = duplicate_arg.split(',')
                if len(parts) > 1:
                    source_ns = parts[0]
                    targets = parts[1:]
            if source_ns and targets:
                # Find tags in source namespace
                source_tags = [t for t in existing_tags if t.startswith(source_ns + ':')]
                for t in source_tags:
                    value = t.split(':', 1)[1]
                    for target_ns in targets:
                        new_tag = f"{target_ns}:{value}"
                        if new_tag not in existing_tags and new_tag not in tags_to_add:
                            tags_to_add.append(new_tag)
        # Merge new tags with existing tags, handling namespace overwrites
        # When adding a tag like "namespace:value", remove any existing "namespace:*" tags
        for new_tag in tags_to_add:
            # Check if this is a namespaced tag (format: "namespace:value")
            if ':' in new_tag:
                namespace = new_tag.split(':', 1)[0]
                # Remove any existing tags with the same namespace
                existing_tags = [t for t in existing_tags if not (t.startswith(namespace + ':'))]
            # Add the new tag if not already present
            if new_tag not in existing_tags:
                existing_tags.append(new_tag)
        # Update the result's tags
        if isinstance(res, models.PipeObject):
            res.extra['tags'] = existing_tags
        elif isinstance(res, dict):
            res['tags'] = existing_tags
        # Determine where to add tags: Hydrus, local DB, or sidecar
        if storage_source and storage_source.lower() == 'hydrus':
            # Add tags to Hydrus using the API
            target_hash = hydrus_hash or file_hash
            if target_hash:
                try:
                    log(f"[add_tags] Adding {len(existing_tags)} tag(s) to Hydrus file: {target_hash}", file=sys.stderr)
                    hydrus_client = hydrus_wrapper.get_client(config)
                    hydrus_client.add_tags(target_hash, existing_tags, "my tags")
                    log(f"[add_tags] ✓ Tags added to Hydrus", file=sys.stderr)
                    sidecar_count += 1
                except Exception as e:
                    log(f"[add_tags] Warning: Failed to add tags to Hydrus: {e}", file=sys.stderr)
            else:
                log(f"[add_tags] Warning: No hash available for Hydrus file, skipping", file=sys.stderr)
        elif storage_source and storage_source.lower() == 'local':
            # For local storage, save directly to DB (no sidecar needed)
            if file_path:
                library_root = get_local_storage_path(config)
                if library_root:
                    try:
                        with LocalLibraryDB(library_root) as db:
                            db.save_tags(Path(file_path), existing_tags)
                            log(f"[add_tags] Saved {len(existing_tags)} tag(s) to local DB", file=sys.stderr)
                            sidecar_count += 1
                    except Exception as e:
                        log(f"[add_tags] Warning: Failed to save tags to local DB: {e}", file=sys.stderr)
                else:
                    log(f"[add_tags] Warning: No library root configured for local storage, skipping", file=sys.stderr)
            else:
                log(f"[add_tags] Warning: No file path for local storage, skipping", file=sys.stderr)
        else:
            # For other storage types or unknown sources, write sidecar file if we have a file path
            if file_path:
                try:
                    sidecar_path = write_sidecar(Path(file_path), existing_tags, [], file_hash)
                    log(f"[add_tags] Wrote {len(existing_tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
                    sidecar_count += 1
                except Exception as e:
                    log(f"[add_tags] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr)
        # Emit the modified result
        ctx.emit(res)
    log(f"[add_tags] Processed {len(results)} result(s)", file=sys.stderr)
    return 0
--- a/cmdlets/add_url.py
+++ b/cmdlets/add_url.py
@@ -0,0 +1,78 @@
 from __future__ import annotations
 from typing import Any, Dict, Sequence
 import json
 from . import register
 import models
 import pipeline as ctx
 from helper import hydrus as hydrus_wrapper
 from ._shared import Cmdlet, CmdletArg, normalize_hash
 from helper.logger import log
 CMDLET = Cmdlet(
    name="add-url",
    summary="Associate a URL with a Hydrus file.",
    usage="add-url [-hash <sha256>] <url>",
    args=[
        CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
        CmdletArg("url", required=True, description="The URL to associate with the file."),
    ],
    details=[
        "- Adds the URL to the Hydrus file's known URL list.",
    ],
 )
@register(["add-url", "ass-url", "associate-url", "add_url"])  # aliases
 def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    from ._shared import parse_cmdlet_args
    parsed = parse_cmdlet_args(args, CMDLET)
    override_hash = parsed.get("hash")
    url = parsed.get("url")
    if not url:
        log("Requires a URL argument")
        return 1
    url = str(url).strip()
    if not url:
        log("Requires a non-empty URL")
        return 1
    # Handle @N selection which creates a list - extract the first item
    if isinstance(result, list) and len(result) > 0:
        result = result[0]
    hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
    if not hash_hex:
        log("Selected result does not include a Hydrus hash")
        return 1
    try:
        client = hydrus_wrapper.get_client(config)
    except Exception as exc:
        log(f"Hydrus client unavailable: {exc}")
        return 1
    if client is None:
        log("Hydrus client unavailable")
        return 1
    try:
        client.associate_url(hash_hex, url)
    except Exception as exc:
        log(f"Hydrus add-url failed: {exc}")
        return 1
    preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
    ctx.emit(f"Associated URL with {preview}: {url}")
    return 0
--- a/cmdlets/adjective.py
+++ b/cmdlets/adjective.py
@@ -0,0 +1,148 @@
 import json
 import os
 import sys
 from typing import List, Dict, Any, Optional, Sequence
 from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
 from helper.logger import log
 from result_table import ResultTable
 import pipeline as ctx
 ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "helper", "adjective.json")
 def _load_adjectives() -> Dict[str, List[str]]:
    try:
        if os.path.exists(ADJECTIVE_FILE):
            with open(ADJECTIVE_FILE, 'r', encoding='utf-8') as f:
                return json.load(f)
    except Exception as e:
        log(f"Error loading adjectives: {e}", file=sys.stderr)
    return {}
 def _save_adjectives(data: Dict[str, List[str]]) -> bool:
    try:
        with open(ADJECTIVE_FILE, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2)
        return True
    except Exception as e:
        log(f"Error saving adjectives: {e}", file=sys.stderr)
        return False
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    data = _load_adjectives()
    # Parse arguments manually first to handle positional args
    # We expect: .adjective [category] [tag] [-add] [-delete]
    # If no args, list categories
    if not args:
        table = ResultTable("Adjective Categories")
        for i, (category, tags) in enumerate(data.items()):
            row = table.add_row()
            row.add_column("#", str(i + 1))
            row.add_column("Category", category)
            row.add_column("Tag Amount", str(len(tags)))
            # Selection expands to: .adjective "Category Name"
            table.set_row_selection_args(i, [category])
        table.set_source_command(".adjective")
        ctx.set_last_result_table_overlay(table, list(data.keys()))
        ctx.set_current_stage_table(table)
        print(table)
        return 0
    # We have args. First arg is likely category.
    category = args[0]
    # Check if we are adding a new category (implicit if it doesn't exist)
    if category not in data:
        # If only category provided, create it
        if len(args) == 1:
            data[category] = []
            _save_adjectives(data)
            log(f"Created new category: {category}")
        # If more args, we might be trying to add to a non-existent category
        elif "-add" in args:
             data[category] = []
             # Continue to add logic
    # Handle operations within category
    remaining_args = list(args[1:])
    # Check for -add flag
    if "-add" in remaining_args:
        # .adjective category -add tag
        # or .adjective category tag -add
        add_idx = remaining_args.index("-add")
        # Tag could be before or after
        tag = None
        if add_idx + 1 < len(remaining_args):
            tag = remaining_args[add_idx + 1]
        elif add_idx > 0:
            tag = remaining_args[add_idx - 1]
        if tag:
            if tag not in data[category]:
                data[category].append(tag)
                _save_adjectives(data)
                log(f"Added '{tag}' to '{category}'")
            else:
                log(f"Tag '{tag}' already exists in '{category}'")
        else:
            log("Error: No tag specified to add")
            return 1
    # Check for -delete flag
    elif "-delete" in remaining_args:
        # .adjective category -delete tag
        # or .adjective category tag -delete
        del_idx = remaining_args.index("-delete")
        tag = None
        if del_idx + 1 < len(remaining_args):
            tag = remaining_args[del_idx + 1]
        elif del_idx > 0:
            tag = remaining_args[del_idx - 1]
        if tag:
            if tag in data[category]:
                data[category].remove(tag)
                _save_adjectives(data)
                log(f"Deleted '{tag}' from '{category}'")
            else:
                log(f"Tag '{tag}' not found in '{category}'")
        else:
            log("Error: No tag specified to delete")
            return 1
    # List tags in category (Default action if no flags or after modification)
    tags = data.get(category, [])
    table = ResultTable(f"Tags in '{category}'")
    for i, tag in enumerate(tags):
        row = table.add_row()
        row.add_column("#", str(i + 1))
        row.add_column("Tag", tag)
        # Selection expands to: .adjective "Category" "Tag"
        # This allows typing @N -delete to delete it
        table.set_row_selection_args(i, [category, tag])
    table.set_source_command(".adjective")
    ctx.set_last_result_table_overlay(table, tags)
    ctx.set_current_stage_table(table)
    print(table)
    return 0
 CMDLET = Cmdlet(
    name=".adjective",
    aliases=["adj"],
    summary="Manage adjective categories and tags",
    usage=".adjective [category] [-add tag] [-delete tag]",
    args=[
        CmdletArg(name="category", type="string", description="Category name", required=False),
        CmdletArg(name="tag", type="string", description="Tag name", required=False),
        CmdletArg(name="add", type="flag", description="Add tag"),
        CmdletArg(name="delete", type="flag", description="Delete tag"),
    ],
    exec=_run
 )
--- a/cmdlets/check_file_status.py
+++ b/cmdlets/check_file_status.py
@@ -0,0 +1,153 @@
 from __future__ import annotations
 from typing import Any, Dict, Sequence
 import json
 import sys
 from helper.logger import log
 from . import register
 from helper import hydrus as hydrus_wrapper
 from ._shared import Cmdlet, CmdletArg, normalize_hash
 CMDLET = Cmdlet(
    name="check-file-status",
    summary="Check if a file is active, deleted, or corrupted in Hydrus.",
    usage="check-file-status [-hash <sha256>]",
    args=[
        CmdletArg("-hash", description="File hash (SHA256) to check. If not provided, uses selected result."),
    ],
    details=[
        "- Shows whether file is active in Hydrus or marked as deleted",
        "- Detects corrupted data (e.g., comma-separated URLs)",
        "- Displays file metadata and service locations",
        "- Note: Hydrus keeps deleted files for recovery. Use cleanup-corrupted for full removal.",
    ],
 )
@register(["check-file-status", "check-status", "file-status", "status"])
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    # Parse arguments
    override_hash: str | None = None
    i = 0
    while i < len(args):
        token = args[i]
        low = str(token).lower()
        if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
            override_hash = str(args[i + 1]).strip()
            i += 2
            continue
        i += 1
    hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
    if not hash_hex:
        log("No hash provided and no result selected", file=sys.stderr)
        return 1
    try:
        client = hydrus_wrapper.get_client(config)
    except Exception as exc:
        log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
        return 1
    if client is None:
        log("Hydrus client unavailable", file=sys.stderr)
        return 1
    try:
        result_data = client.fetch_file_metadata(hashes=[hash_hex])
        if not result_data.get("metadata"):
            log(f"File not found: {hash_hex[:16]}...", file=sys.stderr)
            return 1
        file_info = result_data["metadata"][0]
        # Status summary
        is_deleted = file_info.get("is_deleted", False)
        is_local = file_info.get("is_local", False)
        is_trashed = file_info.get("is_trashed", False)
        status_str = "DELETED" if is_deleted else ("TRASHED" if is_trashed else "ACTIVE")
        log(f"File status: {status_str}", file=sys.stderr)
        # File info
        log(f"\n📄 File Information:", file=sys.stderr)
        log(f"  Hash: {file_info['hash'][:16]}...", file=sys.stderr)
        log(f"  Size: {file_info['size']:,} bytes", file=sys.stderr)
        log(f"  MIME: {file_info['mime']}", file=sys.stderr)
        log(f"  Dimensions: {file_info.get('width', '?')}x{file_info.get('height', '?')}", file=sys.stderr)
        # Service status
        file_services = file_info.get("file_services", {})
        current_services = file_services.get("current", {})
        deleted_services = file_services.get("deleted", {})
        if current_services:
            log(f"\n✓ In services ({len(current_services)}):", file=sys.stderr)
            for service_key, service_info in current_services.items():
                sname = service_info.get("name", "unknown")
                stype = service_info.get("type_pretty", "unknown")
                log(f"    - {sname} ({stype})", file=sys.stderr)
        if deleted_services:
            log(f"\n✗ Deleted from services ({len(deleted_services)}):", file=sys.stderr)
            for service_key, service_info in deleted_services.items():
                sname = service_info.get("name", "unknown")
                stype = service_info.get("type_pretty", "unknown")
                time_deleted = service_info.get("time_deleted", "?")
                log(f"    - {sname} ({stype}) - deleted at {time_deleted}", file=sys.stderr)
        # URL check
        urls = file_info.get("known_urls", [])
        log(f"\n🔗 URLs ({len(urls)}):", file=sys.stderr)
        corrupted_count = 0
        for i, url in enumerate(urls, 1):
            if "," in url:
                corrupted_count += 1
                log(f"    [{i}] ⚠️ CORRUPTED (comma-separated): {url[:50]}...", file=sys.stderr)
            else:
                log(f"    [{i}] {url[:70]}{'...' if len(url) > 70 else ''}", file=sys.stderr)
        if corrupted_count > 0:
            log(f"\n⚠️ WARNING: Found {corrupted_count} corrupted URL(s)", file=sys.stderr)
        # Tags
        tags_dict = file_info.get("tags", {})
        total_tags = 0
        for service_key, service_data in tags_dict.items():
            service_name = service_data.get("name", "unknown")
            display_tags = service_data.get("display_tags", {}).get("0", [])
            total_tags += len(display_tags)
        if total_tags > 0:
            log(f"\n🏷️ Tags ({total_tags}):", file=sys.stderr)
            for service_key, service_data in tags_dict.items():
                display_tags = service_data.get("display_tags", {}).get("0", [])
                if display_tags:
                    service_name = service_data.get("name", "unknown")
                    log(f"    {service_name}:", file=sys.stderr)
                    for tag in display_tags[:5]:  # Show first 5
                        log(f"      - {tag}", file=sys.stderr)
                    if len(display_tags) > 5:
                        log(f"      ... and {len(display_tags) - 5} more", file=sys.stderr)
        log("\n", file=sys.stderr)
        return 0
    except Exception as exc:
        log(f"Error checking file status: {exc}", file=sys.stderr)
        import traceback
        traceback.print_exc(file=sys.stderr)
        return 1
--- a/cmdlets/cleanup.py
+++ b/cmdlets/cleanup.py
@@ -0,0 +1,110 @@
 """Cleanup cmdlet for removing temporary artifacts from pipeline.
 This cmdlet processes result lists and removes temporary files (marked with is_temp=True),
 then emits the remaining non-temporary results for further pipeline stages.
 """
 from __future__ import annotations
 from typing import Any, Dict, Sequence
 from pathlib import Path
 import sys
 from helper.logger import log
 from . import register
 from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp
 import models
 import pipeline as pipeline_context
@register(["cleanup"])
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    """Remove temporary files from pipeline results.
    Accepts:
    - Single result object with is_temp field
    - List of result objects to clean up
    Process:
    - Filters results by is_temp=True
    - Deletes those files from disk
    - Emits only non-temporary results
    Typical pipeline usage:
    download-data url | screen-shot | add-tag "tag" --all | cleanup
    """
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            import json
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    # Normalize input to list
    results = normalize_result_input(result)
    if not results:
        log("[cleanup] No results to process", file=sys.stderr)
        return 1
    # Separate temporary and permanent results
    temp_results = pipeline_context.filter_results_by_temp(results, include_temp=True)
    perm_results = pipeline_context.filter_results_by_temp(results, include_temp=False)
    # Delete temporary files
    deleted_count = 0
    for temp_result in temp_results:
        try:
            file_path = get_pipe_object_path(temp_result)
            if file_path:
                path_obj = Path(file_path)
                if path_obj.exists():
                    # Delete the file
                    path_obj.unlink()
                    log(f"[cleanup] Deleted temporary file: {path_obj.name}", file=sys.stderr)
                    deleted_count += 1
                    # Clean up any associated sidecar files
                    for ext in ['.tags', '.metadata']:
                        sidecar = path_obj.parent / (path_obj.name + ext)
                        if sidecar.exists():
                            try:
                                sidecar.unlink()
                                log(f"[cleanup] Deleted sidecar: {sidecar.name}", file=sys.stderr)
                            except Exception as e:
                                log(f"[cleanup] Warning: Could not delete sidecar {sidecar.name}: {e}", file=sys.stderr)
                else:
                    log(f"[cleanup] File does not exist: {file_path}", file=sys.stderr)
        except Exception as e:
            log(f"[cleanup] Error deleting file: {e}", file=sys.stderr)
    # Log summary
    log(f"[cleanup] Deleted {deleted_count} temporary file(s), emitting {len(perm_results)} permanent result(s)", file=sys.stderr)
    # Emit permanent results for downstream processing
    for perm_result in perm_results:
        pipeline_context.emit(perm_result)
    return 0
 CMDLET = Cmdlet(
    name="cleanup",
    summary="Remove temporary artifacts from pipeline (marked with is_temp=True).",
    usage="cleanup",
    args=[],
    details=[
        "- Accepts pipeline results that may contain temporary files (screenshots, intermediate artifacts)",
        "- Deletes files marked with is_temp=True from disk",
        "- Also cleans up associated sidecar files (.tags, .metadata)",
        "- Emits only non-temporary results for further processing",
        "- Typical usage at end of pipeline: ... | add-tag \"tag\" --all | cleanup",
        "- Exit code 0 if cleanup successful, 1 if no results to process",
    ],
 )
--- a/cmdlets/delete_file.py
+++ b/cmdlets/delete_file.py
@@ -0,0 +1,242 @@
 from __future__ import annotations
 from typing import Any, Dict, Sequence
 import json
 import sys
 from helper.logger import log
 import sqlite3
 from pathlib import Path
 import models
 import pipeline as ctx
 from helper import hydrus as hydrus_wrapper
 from ._shared import Cmdlet, CmdletArg, normalize_hash
 def _delete_database_entry(db_path: Path, file_path: str) -> bool:
    """Delete file and related entries from local library database.
    Args:
        db_path: Path to the library.db file
        file_path: Exact file path string as stored in database
    Returns:
        True if successful, False otherwise
    """
    try:
        if not db_path.exists():
            log(f"Database not found at {db_path}", file=sys.stderr)
            return False
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()
        log(f"Searching database for file_path: {file_path}", file=sys.stderr)
        # Find the file_id using the exact file_path
        cursor.execute('SELECT id FROM files WHERE file_path = ?', (file_path,))
        result = cursor.fetchone()
        if not result:
            log(f"ERROR: File path not found in database", file=sys.stderr)
            log(f"Expected: {file_path}", file=sys.stderr)
            # Debug: show sample entries
            cursor.execute('SELECT id, file_path FROM files LIMIT 3')
            samples = cursor.fetchall()
            if samples:
                log(f"Sample DB entries:", file=sys.stderr)
                for fid, fpath in samples:
                    log(f"{fid}: {fpath}", file=sys.stderr)
            conn.close()
            return False
        file_id = result[0]
        log(f"Found file_id={file_id}, deleting all related records", file=sys.stderr)
        # Delete related records
        cursor.execute('DELETE FROM metadata WHERE file_id = ?', (file_id,))
        meta_count = cursor.rowcount
        cursor.execute('DELETE FROM tags WHERE file_id = ?', (file_id,))
        tags_count = cursor.rowcount
        cursor.execute('DELETE FROM notes WHERE file_id = ?', (file_id,))
        notes_count = cursor.rowcount
        cursor.execute('DELETE FROM files WHERE id = ?', (file_id,))
        files_count = cursor.rowcount
        conn.commit()
        conn.close()
        log(f"Deleted: metadata={meta_count}, tags={tags_count}, notes={notes_count}, files={files_count}", file=sys.stderr)
        return True
    except Exception as exc:
        log(f"Database cleanup failed: {exc}", file=sys.stderr)
        import traceback
        traceback.print_exc(file=sys.stderr)
        return False
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    # Handle @N selection which creates a list - extract the first item
    if isinstance(result, list) and len(result) > 0:
        result = result[0]
    # Parse overrides and options
    override_hash: str | None = None
    conserve: str | None = None
    lib_root: str | None = None
    reason_tokens: list[str] = []
    i = 0
    while i < len(args):
        token = args[i]
        low = str(token).lower()
        if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
            override_hash = str(args[i + 1]).strip()
            i += 2
            continue
        if low in {"-conserve", "--conserve"} and i + 1 < len(args):
            value = str(args[i + 1]).strip().lower()
            if value in {"local", "hydrus"}:
                conserve = value
                i += 2
                continue
        if low in {"-lib-root", "--lib-root", "lib-root"} and i + 1 < len(args):
            lib_root = str(args[i + 1]).strip()
            i += 2
            continue
        reason_tokens.append(token)
        i += 1
    # Handle result as either dict or object
    if isinstance(result, dict):
        hash_hex_raw = result.get("hash_hex") or result.get("hash")
        target = result.get("target")
        origin = result.get("origin")
    else:
        hash_hex_raw = getattr(result, "hash_hex", None) or getattr(result, "hash", None)
        target = getattr(result, "target", None)
        origin = getattr(result, "origin", None)
    # For Hydrus files, the target IS the hash
    if origin and origin.lower() == "hydrus" and not hash_hex_raw:
        hash_hex_raw = target
    hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_hex_raw)
    reason = " ".join(token for token in reason_tokens if str(token).strip()).strip()
    local_deleted = False
    local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
    if conserve != "local" and local_target:
        path = Path(str(target))
        file_path_str = str(target)  # Keep the original string for DB matching
        try:
            if path.exists() and path.is_file():
                path.unlink()
                local_deleted = True
                if ctx._PIPE_ACTIVE:
                    ctx.emit(f"Removed local file: {path}")
                log(f"Deleted: {path.name}", file=sys.stderr)
        except Exception as exc:
            log(f"Local delete failed: {exc}", file=sys.stderr)
        # Remove common sidecars regardless of file removal success
        for sidecar in (path.with_suffix(".tags"), path.with_suffix(".tags.txt"), 
                       path.with_suffix(".metadata"), path.with_suffix(".notes")):
            try:
                if sidecar.exists() and sidecar.is_file():
                    sidecar.unlink()
            except Exception:
                pass
        # Clean up database entry if library root provided - do this regardless of file deletion success
        if lib_root:
            lib_root_path = Path(lib_root)
            db_path = lib_root_path / ".downlow_library.db"
            log(f"Attempting DB cleanup: lib_root={lib_root}, db_path={db_path}", file=sys.stderr)
            log(f"Deleting DB entry for: {file_path_str}", file=sys.stderr)
            if _delete_database_entry(db_path, file_path_str):
                if ctx._PIPE_ACTIVE:
                    ctx.emit(f"Removed database entry: {path.name}")
                log(f"Database entry cleaned up", file=sys.stderr)
                local_deleted = True  # Mark as deleted if DB cleanup succeeded
            else:
                log(f"Database entry not found or cleanup failed for {file_path_str}", file=sys.stderr)
        else:
            log(f"No lib_root provided, skipping database cleanup", file=sys.stderr)
    hydrus_deleted = False
    if conserve != "hydrus" and hash_hex:
        try:
            client = hydrus_wrapper.get_client(config)
        except Exception as exc:
            if not local_deleted:
                log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
                return 1
        else:
            if client is None:
                if not local_deleted:
                    log("Hydrus client unavailable", file=sys.stderr)
                    return 1
            else:
                payload: Dict[str, Any] = {"hashes": [hash_hex]}
                if reason:
                    payload["reason"] = reason
                try:
                    client._post("/add_files/delete_files", data=payload)  # type: ignore[attr-defined]
                    hydrus_deleted = True
                    preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
                    log(f"Deleted from Hydrus: {preview}…", file=sys.stderr)
                except Exception as exc:
                    log(f"Hydrus delete failed: {exc}", file=sys.stderr)
                    if not local_deleted:
                        return 1
    if hydrus_deleted and hash_hex:
        preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
        if ctx._PIPE_ACTIVE:
            if reason:
                ctx.emit(f"Deleted {preview} (reason: {reason}).")
            else:
                ctx.emit(f"Deleted {preview}.")
    if hydrus_deleted or local_deleted:
        return 0
    log("Selected result has neither Hydrus hash nor local file target")
    return 1
 CMDLET = Cmdlet(
    name="delete-file",
    summary="Delete a file locally and/or from Hydrus, including database entries.",
    usage="delete-file [-hash <sha256>] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
    aliases=["del-file"],
    args=[
        CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
        CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
        CmdletArg("lib-root", description="Path to local library root for database cleanup."),
        CmdletArg("reason", description="Optional reason for deletion (free text)."),
    ],
    details=[
        "Default removes both the local file and Hydrus file.",
        "Use -conserve local to keep the local file, or -conserve hydrus to keep it in Hydrus.",
        "Database entries are automatically cleaned up for local files.",
        "Any remaining arguments are treated as the Hydrus reason text.",
    ],
 )
--- a/cmdlets/delete_note.py
+++ b/cmdlets/delete_note.py
@@ -0,0 +1,79 @@
 from __future__ import annotations
 from typing import Any, Dict, Sequence
 import json
 from helper import hydrus as hydrus_wrapper
 from ._shared import Cmdlet, CmdletArg, normalize_hash
 from helper.logger import log
 CMDLET = Cmdlet(
    name="delete-note",
    summary="Delete a named note from a Hydrus file.",
    usage="i | del-note [-hash <sha256>] <name>",
    aliases=["del-note"],
    args=[
    ],
    details=[
        "- Removes the note with the given name from the Hydrus file.",
    ],
 )
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    if not args:
        log("Requires the note name/key to delete")
        return 1
    override_hash: str | None = None
    rest: list[str] = []
    i = 0
    while i < len(args):
        a = args[i]
        low = str(a).lower()
        if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
            override_hash = str(args[i + 1]).strip()
            i += 2
            continue
        rest.append(a)
        i += 1
    if not rest:
        log("Requires the note name/key to delete")
        return 1
    name = str(rest[0] or '').strip()
    if not name:
        log("Requires a non-empty note name/key")
        return 1
    # Handle @N selection which creates a list - extract the first item
    if isinstance(result, list) and len(result) > 0:
        result = result[0]
    hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
    if not hash_hex:
        log("Selected result does not include a Hydrus hash")
        return 1
    try:
        client = hydrus_wrapper.get_client(config)
    except Exception as exc:
        log(f"Hydrus client unavailable: {exc}")
        return 1
    if client is None:
        log("Hydrus client unavailable")
        return 1
    try:
        service_name = "my notes"
        client.delete_notes(hash_hex, [name], service_name)
    except Exception as exc:
        log(f"Hydrus delete-note failed: {exc}")
        return 1
    log(f"Deleted note '{name}'")
    return 0
--- a/cmdlets/delete_tag.py
+++ b/cmdlets/delete_tag.py
@@ -0,0 +1,219 @@
 from __future__ import annotations
 from typing import Any, Dict, Sequence
 import json
 from . import register
 import models
 import pipeline as ctx
 from helper import hydrus as hydrus_wrapper
 from ._shared import Cmdlet, CmdletArg, normalize_hash, parse_tag_arguments
 from helper.logger import log
 CMDLET = Cmdlet(
    name="delete-tags",
    summary="Remove tags from a Hydrus file.",
    usage="del-tags [-hash <sha256>] <tag>[,<tag>...]",
    aliases=["del-tag", "del-tags", "delete-tag"],
    args=[
        CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
        CmdletArg("<tag>[,<tag>...]", required=True, description="One or more tags to remove. Comma- or space-separated."),
    ],
    details=[
        "- Requires a Hydrus file (hash present) or explicit -hash override.",
        "- Multiple tags can be comma-separated or space-separated.",
    ],
 )
@register(["del-tag", "del-tags", "delete-tag", "delete-tags"])  # Still needed for backward compatibility
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    # Check if we have a piped TagItem with no args (i.e., from @1 | delete-tag)
    has_piped_tag = (result and hasattr(result, '__class__') and 
                     result.__class__.__name__ == 'TagItem' and 
                     hasattr(result, 'tag_name'))
    # Check if we have a piped list of TagItems (from @N selection)
    has_piped_tag_list = (isinstance(result, list) and result and 
                          hasattr(result[0], '__class__') and 
                          result[0].__class__.__name__ == 'TagItem')
    if not args and not has_piped_tag and not has_piped_tag_list:
        log("Requires at least one tag argument")
        return 1
    # Parse -hash override and collect tags from remaining args
    override_hash: str | None = None
    rest: list[str] = []
    i = 0
    while i < len(args):
        a = args[i]
        low = str(a).lower()
        if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
            override_hash = str(args[i + 1]).strip()
            i += 2
            continue
        rest.append(a)
        i += 1
    # Check if first argument is @ syntax (result table selection)
    # @5 or @{2,5,8} to delete tags from ResultTable by index
    tags_from_at_syntax = []
    hash_from_at_syntax = None
    if rest and str(rest[0]).startswith("@"):
        selector_arg = str(rest[0])
        pipe_selector = selector_arg[1:].strip()
        # Parse @N or @{N,M,K} syntax
        if pipe_selector.startswith("{") and pipe_selector.endswith("}"):
            # @{2,5,8}
            pipe_selector = pipe_selector[1:-1]
        try:
            indices = [int(tok.strip()) for tok in pipe_selector.split(',') if tok.strip()]
        except ValueError:
            log("Invalid selection syntax. Use @2 or @{2,5,8}")
            return 1
        # Get the last ResultTable from pipeline context
        try:
            last_table = ctx._LAST_RESULT_TABLE
            if last_table:
                # Extract tags from selected rows
                for idx in indices:
                    if 1 <= idx <= len(last_table.rows):
                        # Look for a TagItem in _LAST_RESULT_ITEMS by index
                        if idx - 1 < len(ctx._LAST_RESULT_ITEMS):
                            item = ctx._LAST_RESULT_ITEMS[idx - 1]
                            if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem':
                                tag_name = getattr(item, 'tag_name', None)
                                if tag_name:
                                    log(f"[delete_tag] Extracted tag from @{idx}: {tag_name}")
                                    tags_from_at_syntax.append(tag_name)
                                # Also get hash from first item for consistency
                                if not hash_from_at_syntax:
                                    hash_from_at_syntax = getattr(item, 'hash_hex', None)
                if not tags_from_at_syntax:
                    log(f"No tags found at indices: {indices}")
                    return 1
            else:
                log("No ResultTable in pipeline (use @ after running get-tag)")
                return 1
        except Exception as exc:
            log(f"Error processing @ selection: {exc}", file=__import__('sys').stderr)
            return 1
    # Handle @N selection which creates a list - extract the first item
    if isinstance(result, list) and len(result) > 0:
        # If we have a list of TagItems, we want to process ALL of them if no args provided
        # This handles: delete-tag @1 (where @1 expands to a list containing one TagItem)
        if not args and hasattr(result[0], '__class__') and result[0].__class__.__name__ == 'TagItem':
            # We will extract tags from the list later
            pass
        else:
            result = result[0]
    # Determine tags and hash to use
    tags: list[str] = []
    hash_hex = None
    if tags_from_at_syntax:
        # Use tags extracted from @ syntax
        tags = tags_from_at_syntax
        hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_from_at_syntax)
        log(f"[delete_tag] Using @ syntax extraction: {len(tags)} tag(s) to delete: {tags}")
    elif isinstance(result, list) and result and hasattr(result[0], '__class__') and result[0].__class__.__name__ == 'TagItem':
        # Got a list of TagItems (e.g. from delete-tag @1)
        tags = [getattr(item, 'tag_name') for item in result if getattr(item, 'tag_name', None)]
        # Use hash from first item
        hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result[0], "hash_hex", None))
    elif result and hasattr(result, '__class__') and result.__class__.__name__ == 'TagItem':
        # Got a piped TagItem - delete this specific tag
        tag_name = getattr(result, 'tag_name', None)
        if tag_name:
            tags = [tag_name]
        hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
    else:
        # Traditional mode - parse tag arguments
        tags = parse_tag_arguments(rest)
        hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
    if not tags:
        log("No valid tags were provided")
        return 1
    if not hash_hex:
        log("Selected result does not include a hash")
        return 1
    try:
        service_name = hydrus_wrapper.get_tag_service_name(config)
    except Exception as exc:
        log(f"Failed to resolve tag service: {exc}")
        return 1
    try:
        client = hydrus_wrapper.get_client(config)
    except Exception as exc:
        log(f"Hydrus client unavailable: {exc}")
        return 1
    if client is None:
        log("Hydrus client unavailable")
        return 1
    log(f"[delete_tag] Sending deletion request: hash={hash_hex}, tags={tags}, service={service_name}")
    try:
        result = client.delete_tags(hash_hex, tags, service_name)
        log(f"[delete_tag] Hydrus response: {result}")
    except Exception as exc:
        log(f"Hydrus del-tag failed: {exc}")
        return 1
    preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
    log(f"Removed {len(tags)} tag(s) from {preview} via '{service_name}'.")
    # Re-fetch and emit updated tags after deletion
    try:
        payload = client.fetch_file_metadata(hashes=[str(hash_hex)], include_service_keys_to_tags=True, include_file_urls=False)
        items = payload.get("metadata") if isinstance(payload, dict) else None
        if isinstance(items, list) and items:
            meta = items[0] if isinstance(items[0], dict) else None
            if isinstance(meta, dict):
                # Extract tags from updated metadata
                from cmdlets.get_tag import _extract_my_tags_from_hydrus_meta, TagItem
                service_key = hydrus_wrapper.get_tag_service_key(client, service_name)
                updated_tags = _extract_my_tags_from_hydrus_meta(meta, service_key, service_name)
                # Emit updated tags as TagItem objects
                from result_table import ResultTable
                table = ResultTable("Tags", max_columns=2)
                tag_items = []
                for idx, tag_name in enumerate(updated_tags, start=1):
                    tag_item = TagItem(
                        tag_name=tag_name,
                        tag_index=idx,
                        hash_hex=hash_hex,
                        source="hydrus",
                        service_name=service_name,
                    )
                    tag_items.append(tag_item)
                    table.add_result(tag_item)
                    ctx.emit(tag_item)
                # Store items for @ selection in next command (CLI will handle table management)
                # Don't call set_last_result_table so we don't pollute history or table context
    except Exception as exc:
        log(f"Warning: Could not fetch updated tags after deletion: {exc}", file=__import__('sys').stderr)
    return 0
--- a/cmdlets/delete_url.py
+++ b/cmdlets/delete_url.py
@@ -0,0 +1,82 @@
 from __future__ import annotations
 from typing import Any, Dict, Sequence
 import json
 from . import register
 from helper import hydrus as hydrus_wrapper
 from ._shared import Cmdlet, CmdletArg, normalize_hash
 from helper.logger import log
 CMDLET = Cmdlet(
    name="delete-url",
    summary="Remove a URL association from a Hydrus file.",
    usage="delete-url [-hash <sha256>] <url>",
    args=[
        CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
        CmdletArg("<url>", required=True, description="The URL to remove from the file."),
    ],
    details=[
        "- Removes the URL from the Hydrus file's known URL list.",
    ],
 )
 def _parse_hash_and_rest(args: Sequence[str]) -> tuple[str | None, list[str]]:
    override_hash: str | None = None
    rest: list[str] = []
    i = 0
    while i < len(args):
        a = args[i]
        low = str(a).lower()
        if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
            override_hash = str(args[i + 1]).strip()
            i += 2
            continue
        rest.append(a)
        i += 1
    return override_hash, rest
@register(["del-url", "delete-url", "delete_url"])  # aliases
 def delete(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    override_hash, rest = _parse_hash_and_rest(args)
    if not rest:
        log("Requires a URL argument")
        return 1
    url = str(rest[0] or '').strip()
    if not url:
        log("Requires a non-empty URL")
        return 1
    # Handle @N selection which creates a list - extract the first item
    if isinstance(result, list) and len(result) > 0:
        result = result[0]
    hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
    if not hash_hex:
        log("Selected result does not include a Hydrus hash")
        return 1
    try:
        client = hydrus_wrapper.get_client(config)
    except Exception as exc:
        log(f"Hydrus client unavailable: {exc}")
        return 1
    if client is None:
        log("Hydrus client unavailable")
        return 1
    try:
        client.delete_url(hash_hex, url)
    except Exception as exc:
        log(f"Hydrus del-url failed: {exc}")
        return 1
    log(f"Deleted URL: {url}")
    return 0
--- a/cmdlets/download_data.py
+++ b/cmdlets/download_data.py
--- a/cmdlets/get_file.py
+++ b/cmdlets/get_file.py
--- a/cmdlets/get_metadata.py
+++ b/cmdlets/get_metadata.py
@@ -0,0 +1,246 @@
 from __future__ import annotations
 from typing import Any, Dict, Sequence, Optional
 import json
 import sys
 from helper.logger import log
 from pathlib import Path
 import mimetypes
 import os
 from helper import hydrus as hydrus_wrapper
 from ._shared import Cmdlet, CmdletArg, normalize_hash
 def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
            log(json.dumps(CMDLET.to_dict(), ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    # Helper to get field from both dict and object
    def get_field(obj: Any, field: str, default: Any = None) -> Any:
        if isinstance(obj, dict):
            return obj.get(field, default)
        else:
            return getattr(obj, field, default)
    # Parse -hash override
    override_hash: str | None = None
    args_list = list(_args)
    i = 0
    while i < len(args_list):
        a = args_list[i]
        low = str(a).lower()
        if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
            override_hash = str(args_list[i + 1]).strip()
            break
        i += 1
    # Try to determine if this is a local file or Hydrus file
    local_path = get_field(result, "target", None) or get_field(result, "path", None)
    is_local = False
    if local_path and isinstance(local_path, str) and not local_path.startswith(("http://", "https://")):
        is_local = True
    # LOCAL FILE PATH
    if is_local and local_path:
        try:
            file_path = Path(str(local_path))
            if file_path.exists() and file_path.is_file():
                # Get the hash from result or compute it
                hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
                # If no hash, compute SHA256 of the file
                if not hash_hex:
                    try:
                        import hashlib
                        with open(file_path, 'rb') as f:
                            hash_hex = hashlib.sha256(f.read()).hexdigest()
                    except Exception:
                        hash_hex = None
                # Get MIME type
                mime_type, _ = mimetypes.guess_type(str(file_path))
                if not mime_type:
                    mime_type = "unknown"
                # Get file size
                try:
                    file_size = file_path.stat().st_size
                except Exception:
                    file_size = None
                # Try to get duration if it's a media file
                duration_seconds = None
                try:
                    # Try to use ffprobe if available
                    import subprocess
                    result_proc = subprocess.run(
                        ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", str(file_path)],
                        capture_output=True,
                        text=True,
                        timeout=5
                    )
                    if result_proc.returncode == 0 and result_proc.stdout.strip():
                        try:
                            duration_seconds = float(result_proc.stdout.strip())
                        except ValueError:
                            pass
                except Exception:
                    pass
                # Get format helpers from search module
                try:
                    from .search_file import _format_size as _fmt_size
                    from .search_file import _format_duration as _fmt_dur
                except Exception:
                    _fmt_size = lambda x: str(x) if x is not None else ""
                    _fmt_dur = lambda x: str(x) if x is not None else ""
                size_label = _fmt_size(file_size) if file_size is not None else ""
                dur_label = _fmt_dur(duration_seconds) if duration_seconds is not None else ""
                # Get known URLs from sidecar or result
                urls = []
                sidecar_path = Path(str(file_path) + '.tags')
                if sidecar_path.exists():
                    try:
                        with open(sidecar_path, 'r', encoding='utf-8') as f:
                            for line in f:
                                line = line.strip()
                                if line.startswith('known_url:'):
                                    url_value = line.replace('known_url:', '', 1).strip()
                                    if url_value:
                                        urls.append(url_value)
                    except Exception:
                        pass
                # Fallback to result URLs if not in sidecar
                if not urls:
                    urls_from_result = get_field(result, "known_urls", None) or get_field(result, "urls", None)
                    if isinstance(urls_from_result, list):
                        urls.extend([str(u).strip() for u in urls_from_result if u])
                # Display local file metadata
                log(f"PATH: {file_path}")
                if hash_hex:
                    log(f"HASH: {hash_hex}")
                if mime_type:
                    log(f"MIME: {mime_type}")
                if size_label:
                    log(f"Size: {size_label}")
                if dur_label:
                    log(f"Duration: {dur_label}")
                if urls:
                    log("URLs:")
                    for url in urls:
                        log(f"  {url}")
                return 0
        except Exception as exc:
            # Fall through to Hydrus if local file handling fails
            pass
    # HYDRUS PATH
    hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
    if not hash_hex:
        log("Selected result does not include a Hydrus hash or local path", file=sys.stderr)
        return 1
    try:
        client = hydrus_wrapper.get_client(config)
    except Exception as exc:
        log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
        return 1
    if client is None:
        log("Hydrus client unavailable", file=sys.stderr)
        return 1
    try:
        payload = client.fetch_file_metadata(
            hashes=[hash_hex],
            include_service_keys_to_tags=False,
            include_file_urls=True,
            include_duration=True,
            include_size=True,
            include_mime=True,
        )
    except Exception as exc:
        log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr)
        return 1
    items = payload.get("metadata") if isinstance(payload, dict) else None
    if not isinstance(items, list) or not items:
        log("No metadata found.")
        return 0
    meta = items[0] if isinstance(items[0], dict) else None
    if not isinstance(meta, dict):
        log("No metadata found.")
        return 0
    mime = meta.get("mime")
    size = meta.get("size") or meta.get("file_size")
    duration_value = meta.get("duration")
    inner = meta.get("metadata") if isinstance(meta.get("metadata"), dict) else None
    if duration_value is None and isinstance(inner, dict):
        duration_value = inner.get("duration")
    try:
        from .search_file import _format_size as _fmt_size
        from .search_file import _format_duration as _fmt_dur
        from .search_file import _hydrus_duration_seconds as _dur_secs
    except Exception:
        _fmt_size = lambda x: str(x) if x is not None else ""
        _dur_secs = lambda x: x
        _fmt_dur = lambda x: str(x) if x is not None else ""
    dur_seconds = _dur_secs(duration_value)
    dur_label = _fmt_dur(dur_seconds) if dur_seconds is not None else ""
    size_label = _fmt_size(size)
    # Display Hydrus file metadata
    log(f"PATH: hydrus://file/{hash_hex}")
    log(f"Hash: {hash_hex}")
    if mime:
        log(f"MIME: {mime}")
    if dur_label:
        log(f"Duration: {dur_label}")
    if size_label:
        log(f"Size: {size_label}")
    urls = meta.get("known_urls") or meta.get("urls")
    if isinstance(urls, list) and urls:
        log("URLs:")
        for url in urls:
            try:
                text = str(url).strip()
            except Exception:
                text = ""
            if text:
                log(f"  {text}")
    return 0
 CMDLET = Cmdlet(
    name="get-metadata",
    summary="Print metadata for local or Hydrus files (hash, mime, duration, size, URLs).",
    usage="get-metadata [-hash <sha256>]",
    aliases=["meta"],
    args=[
        CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
    ],
    details=[
        "- For local files: Shows path, hash (computed if needed), MIME type, size, duration, and known URLs from sidecar.",
        "- For Hydrus files: Shows path (hydrus://), hash, MIME, duration, size, and known URLs.",
        "- Automatically detects local vs Hydrus files.",
        "- Local file hashes are computed via SHA256 if not already available.",
    ],
 )
--- a/cmdlets/get_note.py
+++ b/cmdlets/get_note.py
@@ -0,0 +1,87 @@
 from __future__ import annotations
 from typing import Any, Dict, Sequence
 import json
 from . import register
 import models
 import pipeline as ctx
 from helper import hydrus as hydrus_wrapper
 from ._shared import Cmdlet, CmdletArg, normalize_hash
 from helper.logger import log
 CMDLET = Cmdlet(
    name="get-note",
    summary="List notes on a Hydrus file.",
    usage="get-note [-hash <sha256>]",
    args=[
        CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
    ],
    details=[
        "- Prints notes by service and note name.",
    ],
 )
@register(["get-note", "get-notes", "get_note"])  # aliases
 def get_notes(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    # Helper to get field from both dict and object
    def get_field(obj: Any, field: str, default: Any = None) -> Any:
        if isinstance(obj, dict):
            return obj.get(field, default)
        else:
            return getattr(obj, field, default)
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    from ._shared import parse_cmdlet_args
    parsed = parse_cmdlet_args(args, CMDLET)
    override_hash = parsed.get("hash")
    hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
    if not hash_hex:
        log("Selected result does not include a Hydrus hash")
        return 1
    try:
        client = hydrus_wrapper.get_client(config)
    except Exception as exc:
        log(f"Hydrus client unavailable: {exc}")
        return 1
    if client is None:
        log("Hydrus client unavailable")
        return 1
    try:
        payload = client.fetch_file_metadata(hashes=[hash_hex], include_service_keys_to_tags=False, include_notes=True)
    except Exception as exc:
        log(f"Hydrus metadata fetch failed: {exc}")
        return 1
    items = payload.get("metadata") if isinstance(payload, dict) else None
    meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None
    notes = {}
    if isinstance(meta, dict):
        # Hydrus returns service_keys_to_tags; for notes we expect 'service_names_to_notes' in modern API
        notes = meta.get('notes') or meta.get('service_names_to_notes') or {}
    if notes:
        ctx.emit("Notes:")
        # Print flattened: service -> (name: text)
        if isinstance(notes, dict) and any(isinstance(v, dict) for v in notes.values()):
            for svc, mapping in notes.items():
                ctx.emit(f"- {svc}:")
                if isinstance(mapping, dict):
                    for k, v in mapping.items():
                        ctx.emit(f"  • {k}: {str(v).strip()}")
        elif isinstance(notes, dict):
            for k, v in notes.items():
                ctx.emit(f"- {k}: {str(v).strip()}")
    else:
        ctx.emit("No notes found.")
    return 0
--- a/cmdlets/get_relationship.py
+++ b/cmdlets/get_relationship.py
@@ -0,0 +1,240 @@
 from __future__ import annotations
 from typing import Any, Dict, Sequence, List, Optional
 import json
 import sys
 from helper.logger import log
 from . import register
 import models
 import pipeline as ctx
 from helper import hydrus as hydrus_wrapper
 from ._shared import Cmdlet, CmdletArg, normalize_hash, fmt_bytes
 CMDLET = Cmdlet(
    name="get-relationship",
    summary="Print Hydrus relationships for the selected file.",
    usage="get-relationship [-hash <sha256>]",
    args=[
        CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
    ],
    details=[
        "- Lists relationship data as returned by Hydrus.",
    ],
 )
@register(["get-rel", "get-relationship", "get-relationships", "get-file-relationships"])  # aliases
 def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    # Parse -hash override
    override_hash: str | None = None
    args_list = list(_args)
    i = 0
    while i < len(args_list):
        a = args_list[i]
        low = str(a).lower()
        if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
            override_hash = str(args_list[i + 1]).strip()
            break
        i += 1
    # Handle @N selection which creates a list - extract the first item
    if isinstance(result, list) and len(result) > 0:
        result = result[0]
    hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
    if not hash_hex:
        log("Selected result does not include a Hydrus hash", file=sys.stderr)
        return 1
    try:
        client = hydrus_wrapper.get_client(config)
    except Exception as exc:
        log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
        return 1
    if client is None:
        log("Hydrus client unavailable", file=sys.stderr)
        return 1
    try:
        rel = client.get_file_relationships(hash_hex)
    except Exception as exc:
        log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
        return 1
    if not rel:
        log("No relationships found.")
        return 0
    # Extract file_relationships from response
    file_rels = rel.get("file_relationships", {})
    if not file_rels:
        log("No relationships found.")
        return 0
    # Get the relationships dict for this specific hash
    this_file_rels = file_rels.get(hash_hex)
    if not this_file_rels:
        log("No relationships found.")
        return 0
    # Extract related hashes from all relationship types
    # Keys "0", "1", "3", "8" are relationship type IDs
    # Values are lists of hashes
    related_hashes = []
    for rel_type_id, hash_list in this_file_rels.items():
        # Skip non-numeric keys and metadata keys
        if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
            continue
        if isinstance(hash_list, list):
            for rel_hash in hash_list:
                if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
                    related_hashes.append(rel_hash)
    # Remove duplicates while preserving order
    seen = set()
    unique_hashes = []
    for h in related_hashes:
        if h not in seen:
            seen.add(h)
            unique_hashes.append(h)
    if not unique_hashes:
        log("No related files found.")
        return 0
    # Fetch metadata for all related files
    try:
        metadata_payload = client.fetch_file_metadata(
            hashes=unique_hashes,
            include_service_keys_to_tags=True,
            include_duration=True,
            include_size=True,
            include_mime=True,
        )
    except Exception as exc:
        log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr)
        return 1
    metadata_list = metadata_payload.get("metadata") if isinstance(metadata_payload, dict) else None
    if not isinstance(metadata_list, list):
        log("Hydrus metadata response was not a list", file=sys.stderr)
        return 1
    # Build metadata map by hash
    meta_by_hash: Dict[str, Dict[str, Any]] = {}
    for item in metadata_list:
        if isinstance(item, dict):
            item_hash = normalize_hash(item.get("hash"))
            if item_hash:
                meta_by_hash[item_hash] = item
    # Helper functions for formatting
    def _format_duration(seconds: Optional[float]) -> str:
        if seconds is None:
            return ""
        try:
            s = int(seconds)
            hours = s // 3600
            minutes = (s % 3600) // 60
            secs = s % 60
            if hours > 0:
                return f"{hours}:{minutes:02d}:{secs:02d}"
            else:
                return f"{minutes}:{secs:02d}"
        except Exception:
            return ""
    def _get_title(meta: Dict[str, Any]) -> str:
        # Try to extract title from tags
        tags_payload = meta.get("tags")
        if isinstance(tags_payload, dict):
            for service_data in tags_payload.values():
                if isinstance(service_data, dict):
                    storage_tags = service_data.get("storage_tags")
                    if isinstance(storage_tags, dict):
                        for tag_list in storage_tags.values():
                            if isinstance(tag_list, list):
                                for tag in tag_list:
                                    tag_str = str(tag).lower()
                                    if tag_str.startswith("title:"):
                                        return str(tag)[6:].strip()
        # Fallback to hash prefix
        h = meta.get("hash")
        return str(h)[:12] if h else "unknown"
    def _get_mime_type(meta: Dict[str, Any]) -> str:
        mime = meta.get("mime", "")
        if not mime:
            return ""
        # Extract type from mime (e.g., "video/mp4" -> "video")
        parts = str(mime).split("/")
        return parts[0] if parts else ""
    # Print header and separator
    log("#  | Title                     | Type  | Duration | Size")
    log("--+---------------------------+-------+----------+--------")
    # Create result objects for each related file
    results: List[Any] = []
    # Print each related file
    for idx, rel_hash in enumerate(unique_hashes, start=1):
        meta = meta_by_hash.get(rel_hash)
        if not meta:
            continue
        title = _get_title(meta)
        mime_type = _get_mime_type(meta)
        # Get duration
        duration_value = meta.get("duration")
        if duration_value is None and isinstance(meta.get("metadata"), dict):
            duration_value = meta["metadata"].get("duration")
        duration_str = _format_duration(duration_value)
        # Get size
        size = meta.get("size") or meta.get("file_size")
        size_str = fmt_bytes(size) if size else ""
        # Format and print row
        title_display = title[:25].ljust(25)
        type_display = mime_type[:5].ljust(5)
        duration_display = duration_str[:8].ljust(8)
        size_display = size_str[:7].ljust(7)
        log(f"{idx:2d} | {title_display} | {type_display} | {duration_display} | {size_display}")
        # Create result object for pipeline
        result_obj = type("RelatedFile", (), {
            "hash_hex": rel_hash,
            "title": title,
            "media_kind": mime_type or "other",
            "size": size,
            "duration": duration_value,
            "known_urls": [],
            "annotations": [],
            "columns": [
                ("Title", title),
                ("Type", mime_type),
                ("Duration", duration_str),
                ("Size", size_str),
            ],
        })()
        results.append(result_obj)
    # Emit results to pipeline
    try:
        ctx._PIPE_EMITS.extend(results)
    except Exception:
        pass
    return 0
--- a/cmdlets/get_tag.py
+++ b/cmdlets/get_tag.py
--- a/cmdlets/get_url.py
+++ b/cmdlets/get_url.py
@@ -0,0 +1,90 @@
 from __future__ import annotations
 from typing import Any, Dict, Sequence
 import json
 from . import register
 import models
 import pipeline as ctx
 from helper import hydrus as hydrus_wrapper
 from ._shared import Cmdlet, CmdletArg, normalize_hash
 from helper.logger import log
 CMDLET = Cmdlet(
    name="get-url",
    summary="List URLs associated with a Hydrus file.",
    usage="get-url [-hash <sha256>]",
    args=[
        CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
    ],
    details=[
        "- Prints the known URLs for the selected Hydrus file.",
    ],
 )
 def _parse_hash_and_rest(args: Sequence[str]) -> tuple[str | None, list[str]]:
    override_hash: str | None = None
    rest: list[str] = []
    i = 0
    while i < len(args):
        a = args[i]
        low = str(a).lower()
        if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
            override_hash = str(args[i + 1]).strip()
            i += 2
            continue
        rest.append(a)
        i += 1
    return override_hash, rest
@register(["get-url", "get-urls", "get_url"])  # aliases
 def get_urls(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    # Helper to get field from both dict and object
    def get_field(obj: Any, field: str, default: Any = None) -> Any:
        if isinstance(obj, dict):
            return obj.get(field, default)
        else:
            return getattr(obj, field, default)
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    override_hash, _ = _parse_hash_and_rest(args)
    hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
    if not hash_hex:
        log("Selected result does not include a Hydrus hash")
        return 1
    try:
        client = hydrus_wrapper.get_client(config)
    except Exception as exc:
        log(f"Hydrus client unavailable: {exc}")
        return 1
    if client is None:
        log("Hydrus client unavailable")
        return 1
    try:
        payload = client.fetch_file_metadata(hashes=[hash_hex], include_file_urls=True)
    except Exception as exc:
        log(f"Hydrus metadata fetch failed: {exc}")
        return 1
    items = payload.get("metadata") if isinstance(payload, dict) else None
    meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None
    urls = (meta.get("known_urls") if isinstance(meta, dict) else None) or []
    if urls:
        ctx.emit("URLs:")
        for u in urls:
            text = str(u).strip()
            if text:
                ctx.emit(f"- {text}")
    else:
        ctx.emit("No URLs found.")
    return 0
--- a/cmdlets/manage_config.py
+++ b/cmdlets/manage_config.py
@@ -0,0 +1,138 @@
 from typing import List, Dict, Any
 from ._shared import Cmdlet, CmdletArg
 from config import load_config, save_config
 CMDLET = Cmdlet(
    name=".config",
    summary="Manage configuration settings",
    usage=".config [key] [value]",
    args=[
        CmdletArg(
            name="key",
            description="Configuration key to update (dot-separated)",
            required=False
        ),
        CmdletArg(
            name="value",
            description="New value for the configuration key",
            required=False
        )
    ]
 )
 def flatten_config(config: Dict[str, Any], parent_key: str = '', sep: str = '.') -> List[Dict[str, Any]]:
    items = []
    for k, v in config.items():
        if k.startswith('_'):  # Skip internal keys
            continue
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_config(v, new_key, sep=sep))
        else:
            items.append({
                "Key": new_key,
                "Value": str(v),
                "Type": type(v).__name__,
                "_selection_args": [new_key]
            })
    return items
 def set_nested_config(config: Dict[str, Any], key: str, value: str) -> bool:
    keys = key.split('.')
    d = config
    # Navigate to the parent dict
    for k in keys[:-1]:
        if k not in d or not isinstance(d[k], dict):
            d[k] = {}
        d = d[k]
    last_key = keys[-1]
    # Try to preserve type if key exists
    if last_key in d:
        current_val = d[last_key]
        if isinstance(current_val, bool):
            if value.lower() in ('true', 'yes', '1', 'on'):
                d[last_key] = True
            elif value.lower() in ('false', 'no', '0', 'off'):
                d[last_key] = False
            else:
                # Fallback to boolean conversion of string (usually True for non-empty)
                # But for config, explicit is better.
                print(f"Warning: Could not convert '{value}' to boolean. Using string.")
                d[last_key] = value
        elif isinstance(current_val, int):
            try:
                d[last_key] = int(value)
            except ValueError:
                print(f"Warning: Could not convert '{value}' to int. Using string.")
                d[last_key] = value
        elif isinstance(current_val, float):
            try:
                d[last_key] = float(value)
            except ValueError:
                print(f"Warning: Could not convert '{value}' to float. Using string.")
                d[last_key] = value
        else:
            d[last_key] = value
    else:
        # New key, try to infer type
        if value.lower() in ('true', 'false'):
            d[last_key] = (value.lower() == 'true')
        elif value.isdigit():
            d[last_key] = int(value)
        else:
            d[last_key] = value
    return True
 def _run(piped_result: Any, args: List[str], config: Dict[str, Any]) -> int:
    # Reload config to ensure we have the latest on disk
    # We don't use the passed 'config' because we want to edit the file
    # and 'config' might contain runtime objects (like worker manager)
    # But load_config() returns a fresh dict from disk (or cache)
    # We should use load_config()
    current_config = load_config()
    # Parse args
    # We handle args manually because of the potential for spaces in values
    # and the @ expansion logic in CLI.py passing args
    if not args:
        # List mode
        items = flatten_config(current_config)
        # Sort by key
        items.sort(key=lambda x: x['Key'])
        # Emit items for ResultTable
        import pipeline as ctx
        for item in items:
            ctx.emit(item)
        return 0
    # Update mode
    key = args[0]
    if len(args) < 2:
        print(f"Error: Value required for key '{key}'")
        return 1
    value = " ".join(args[1:])
    # Remove quotes if present
    if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
        value = value[1:-1]
    try:
        set_nested_config(current_config, key, value)
        save_config(current_config)
        print(f"Updated '{key}' to '{value}'")
        return 0
    except Exception as e:
        print(f"Error updating config: {e}")
        return 1
 CMDLET.exec = _run
--- a/cmdlets/merge_file.py
+++ b/cmdlets/merge_file.py
@@ -0,0 +1,916 @@
 """Merge multiple files into a single output file."""
 from __future__ import annotations
 from typing import Any, Dict, Optional, Sequence, List
 from pathlib import Path
 import json
 import sys
 from helper.logger import log
 from helper.download import download_media
 from models import DownloadOptions
 from config import resolve_output_dir
 import subprocess as _subprocess
 import shutil as _shutil
 from ._shared import parse_cmdlet_args
 try:
    from PyPDF2 import PdfWriter, PdfReader
    HAS_PYPDF2 = True
 except ImportError:
    HAS_PYPDF2 = False
    PdfWriter = None
    PdfReader = None
 try:
    from metadata import (
        read_tags_from_file,
        write_tags_to_file,
        dedup_tags_by_namespace,
        merge_multiple_tag_lists,
        write_tags,
        write_metadata
    )
    HAS_METADATA_API = True
 except ImportError:
    HAS_METADATA_API = False
 from . import register
 from ._shared import (
    Cmdlet,
    CmdletArg,
    normalize_result_input,
    get_pipe_object_path,
    get_pipe_object_hash,
 )
 import models
 import pipeline as ctx
 def _get_item_value(item: Any, key: str, default: Any = None) -> Any:
    """Helper to read either dict keys or attributes."""
    if isinstance(item, dict):
        return item.get(key, default)
    return getattr(item, key, default)
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    """Merge multiple files into one."""
    # Parse help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    # Parse arguments
    parsed = parse_cmdlet_args(args, CMDLET)
    delete_after = parsed.get("delete", False)
    output_override: Optional[Path] = None
    output_arg = parsed.get("output")
    if output_arg:
        try:
            output_override = Path(str(output_arg)).expanduser()
        except Exception:
            output_override = None
    format_spec = parsed.get("format")
    if format_spec:
        format_spec = str(format_spec).lower().strip()
    # Collect files from piped results
    # Use normalize_result_input to handle both single items and lists
    files_to_merge: List[Dict[str, Any]] = normalize_result_input(result)
    if not files_to_merge:
        log("No files provided to merge", file=sys.stderr)
        return 1
    if len(files_to_merge) < 2:
        # Only 1 file - pass it through unchanged
        # (merge only happens when multiple files are collected)
        item = files_to_merge[0]
        ctx.emit(item)
        return 0
    # Extract file paths and metadata from result objects
    source_files: List[Path] = []
    source_tags_files: List[Path] = []
    source_hashes: List[str] = []
    source_urls: List[str] = []
    source_tags: List[str] = []  # NEW: collect tags from source files
    source_relationships: List[str] = []  # NEW: collect relationships from source files
    for item in files_to_merge:
        raw_path = get_pipe_object_path(item)
        target_path = None
        if isinstance(raw_path, Path):
            target_path = raw_path
        elif isinstance(raw_path, str) and raw_path.strip():
            candidate = Path(raw_path).expanduser()
            if candidate.exists():
                target_path = candidate
        # Check for playlist item that needs downloading
        if not target_path and isinstance(item, dict) and item.get('__action', '').startswith('playlist-item:'):
            try:
                playlist_url = item.get('__file_path')
                item_idx = int(item['__action'].split(':')[1])
                log(f"Downloading playlist item #{item_idx} from {playlist_url}...", flush=True)
                output_dir = resolve_output_dir(config)
                opts = DownloadOptions(
                    url=playlist_url,
                    output_dir=output_dir,
                    playlist_items=str(item_idx),
                    mode="audio" if format_spec == "m4b" else "auto" # Infer mode if possible
                )
                res = download_media(opts)
                if res and res.path and res.path.exists():
                    target_path = res.path
                    log(f"✓ Downloaded: {target_path.name}", flush=True)
            except Exception as e:
                log(f"Failed to download playlist item: {e}", file=sys.stderr)
        if target_path and target_path.exists():
            source_files.append(target_path)
            # Track the .tags file for this source
            tags_file = target_path.with_suffix(target_path.suffix + '.tags')
            if tags_file.exists():
                source_tags_files.append(tags_file)
                # Try to read hash, tags, urls, and relationships from .tags sidecar file
                try:
                    tags_content = tags_file.read_text(encoding='utf-8')
                    for line in tags_content.split('\n'):
                        line = line.strip()
                        if not line:
                            continue
                        if line.startswith('hash:'):
                            hash_value = line[5:].strip()
                            if hash_value:
                                source_hashes.append(hash_value)
                        elif line.startswith('known_url:') or line.startswith('url:'):
                            # Extract URLs from tags file
                            url_value = line.split(':', 1)[1].strip() if ':' in line else ''
                            if url_value and url_value not in source_urls:
                                source_urls.append(url_value)
                        elif line.startswith('relationship:'):
                            # Extract relationships from tags file
                            rel_value = line.split(':', 1)[1].strip() if ':' in line else ''
                            if rel_value and rel_value not in source_relationships:
                                source_relationships.append(rel_value)
                        else:
                            # Collect actual tags (not metadata like hash: or known_url:)
                            source_tags.append(line)
                except Exception:
                    pass
            # Extract hash if available in item (as fallback)
            hash_value = get_pipe_object_hash(item)
            if hash_value and hash_value not in source_hashes:
                source_hashes.append(str(hash_value))
            # Extract known URLs if available
            known_urls = _get_item_value(item, 'known_urls', [])
            if isinstance(known_urls, str):
                source_urls.append(known_urls)
            elif isinstance(known_urls, list):
                source_urls.extend(known_urls)
        else:
            title = _get_item_value(item, 'title', 'unknown') or _get_item_value(item, 'id', 'unknown')
            log(f"Warning: Could not locate file for item: {title}", file=sys.stderr)
    if len(source_files) < 2:
        log("At least 2 valid files required to merge", file=sys.stderr)
        return 1
    # Detect file types
    file_types = set()
    for f in source_files:
        suffix = f.suffix.lower()
        if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
            file_types.add('audio')
        elif suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
            file_types.add('video')
        elif suffix in {'.pdf'}:
            file_types.add('pdf')
        elif suffix in {'.txt', '.srt', '.vtt', '.md', '.log'}:
            file_types.add('text')
        else:
            file_types.add('other')
    if len(file_types) > 1 and 'other' not in file_types:
        log(f"Mixed file types detected: {', '.join(sorted(file_types))}", file=sys.stderr)
        log(f"Can only merge files of the same type", file=sys.stderr)
        return 1
    file_kind = list(file_types)[0] if file_types else 'other'
    # Determine output format
    output_format = format_spec or 'auto'
    if output_format == 'auto':
        if file_kind == 'audio':
            output_format = 'mka'  # Default audio codec - mka supports chapters and stream copy
        elif file_kind == 'video':
            output_format = 'mp4'  # Default video codec
        elif file_kind == 'pdf':
            output_format = 'pdf'
        else:
            output_format = 'txt'
    # Determine output path
    if output_override:
        if output_override.is_dir():
            base_name = _sanitize_name(getattr(files_to_merge[0], 'title', 'merged'))
            output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}"
        else:
            output_path = output_override
    else:
        first_file = source_files[0]
        output_path = first_file.parent / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
    # Ensure output directory exists
    output_path.parent.mkdir(parents=True, exist_ok=True)
    # Perform merge based on file type
    if file_kind == 'audio':
        success = _merge_audio(source_files, output_path, output_format)
    elif file_kind == 'video':
        success = _merge_video(source_files, output_path, output_format)
    elif file_kind == 'pdf':
        success = _merge_pdf(source_files, output_path)
    elif file_kind == 'text':
        success = _merge_text(source_files, output_path)
    else:
        log(f"Unsupported file type: {file_kind}", file=sys.stderr)
        return 1
    if not success:
        log("Merge failed", file=sys.stderr)
        return 1
    log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr)
    # Create .tags sidecar file for the merged output using unified API
    tags_path = output_path.with_suffix(output_path.suffix + '.tags')
    try:
        # Start with title tag
        merged_tags = [f"title:{output_path.stem}"]
        # Merge tags from source files using metadata API
        if source_tags and HAS_METADATA_API:
            # Use dedup function to normalize and deduplicate
            merged_source_tags = dedup_tags_by_namespace(source_tags)
            merged_tags.extend(merged_source_tags)
            log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
        elif source_tags:
            # Fallback: simple deduplication if metadata API unavailable
            merged_tags.extend(list(dict.fromkeys(source_tags)))  # Preserve order, remove duplicates
        # Write merged tags to sidecar file
        if HAS_METADATA_API and write_tags_to_file:
            # Use unified API for file writing
            source_hashes_list = source_hashes if source_hashes else None
            source_urls_list = source_urls if source_urls else None
            write_tags_to_file(tags_path, merged_tags, source_hashes_list, source_urls_list)
        else:
            # Fallback: manual file writing
            tags_lines = []
            # Add hash first (if available)
            if source_hashes:
                tags_lines.append(f"hash:{source_hashes[0]}")
            # Add regular tags
            tags_lines.extend(merged_tags)
            # Add known URLs
            if source_urls:
                for url in source_urls:
                    tags_lines.append(f"known_url:{url}")
            # Add relationships (if available)
            if source_relationships:
                for rel in source_relationships:
                    tags_lines.append(f"relationship:{rel}")
            with open(tags_path, 'w', encoding='utf-8') as f:
                f.write('\n'.join(tags_lines) + '\n')
        log(f"Created sidecar: {tags_path.name}", file=sys.stderr)
        # Also create .metadata file using centralized function
        try:
            write_metadata(output_path, source_hashes[0] if source_hashes else None, source_urls, source_relationships)
            log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
        except Exception as e:
            log(f"Warning: Could not create metadata file: {e}", file=sys.stderr)
    except Exception as e:
        log(f"Warning: Could not create sidecar: {e}", file=sys.stderr)
    # Emit PipelineItem so the merged file can be piped to next command
    try:
        # Try to import PipelineItem from downlow module
        try:
            from downlow import PipelineItem
        except ImportError:
            # Fallback: create a simple object with the required attributes
            class SimpleItem:
                def __init__(self, target, title, media_kind, tags=None, known_urls=None):
                    self.target = target
                    self.title = title
                    self.media_kind = media_kind
                    self.tags = tags or []
                    self.known_urls = known_urls or []
            PipelineItem = SimpleItem
        merged_item = PipelineItem(
            target=str(output_path),
            title=output_path.stem,
            media_kind=file_kind,
            tags=merged_tags,  # Include merged tags
            known_urls=source_urls  # Include known URLs
        )
        ctx.emit(merged_item)
    except Exception as e:
        log(f"Warning: Could not emit pipeline item: {e}", file=sys.stderr)
        # Still emit a string representation for feedback
        ctx.emit(f"Merged: {output_path}")
    # Delete source files if requested
    if delete_after:
        # First delete all .tags files
        for tags_file in source_tags_files:
            try:
                tags_file.unlink()
                log(f"Deleted: {tags_file.name}", file=sys.stderr)
            except Exception as e:
                log(f"Warning: Could not delete {tags_file.name}: {e}", file=sys.stderr)
        # Then delete all source files
        for f in source_files:
            try:
                f.unlink()
                log(f"Deleted: {f.name}", file=sys.stderr)
            except Exception as e:
                log(f"Warning: Could not delete {f.name}: {e}", file=sys.stderr)
    return 0
 def _sanitize_name(text: str) -> str:
    """Sanitize filename."""
    allowed = []
    for ch in text:
        allowed.append(ch if (ch.isalnum() or ch in {"-", "_", " ", "."}) else " ")
    return (" ".join("".join(allowed).split()) or "merged").strip()
 def _ext_for_format(fmt: str) -> str:
    """Get file extension for format."""
    format_map = {
        'mp3': 'mp3',
        'm4a': 'm4a',
        'aac': 'aac',
        'opus': 'opus',
        'mka': 'mka',  # Matroska Audio - EXCELLENT chapter support (recommended)
        'mkv': 'mkv',
        'mp4': 'mp4',
        'webm': 'webm',
        'pdf': 'pdf',
        'txt': 'txt',
        'auto': 'mka',  # Default - MKA for chapters
    }
    return format_map.get(fmt.lower(), 'mka')
 def _add_chapters_to_m4a(file_path: Path, chapters: List[Dict]) -> bool:
    """Add chapters to an M4A file using mutagen.
    Args:
        file_path: Path to M4A file
        chapters: List of chapter dicts with 'title', 'start_ms', 'end_ms'
    Returns:
        True if successful, False otherwise
    """
    import logging
    logger = logging.getLogger(__name__)
    if not chapters:
        return True
    try:
        from mutagen.mp4 import MP4, Atom
        from mutagen.mp4._util import Atom as MP4Atom
    except ImportError:
        logger.warning("[merge-file] mutagen not available for chapter writing")
        return False
    try:
        # Load the MP4 file
        audio = MP4(str(file_path))
        # Build the chapter atom
        # MP4 chapters are stored in a 'chap' atom with specific structure
        chapter_data = b''
        for i, chapter in enumerate(chapters, 1):
            # Each chapter entry: 10-byte header + title
            title = chapter.get('title', f'Chapter {i}').encode('utf-8')
            start_time_ms = int(chapter.get('start_ms', 0))
            # Chapter atom format for M4A:
            # (uint32: size)(uint32: 'chap')(uint8: reserved)(uint24: atom type) + more...
            # This is complex, so we'll use a simpler atom approach
            pass
        # Unfortunately, mutagen doesn't have built-in chapter writing for MP4
        # Chapter writing requires low-level atom manipulation
        # For now, we'll just return and note this limitation
        logger.info("[merge-file] MP4 chapter writing via mutagen not fully supported")
        return False
    except Exception as e:
        logger.warning(f"[merge-file] Error writing chapters: {e}")
        return False
 def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
    """Merge audio files with chapters based on file boundaries."""
    import logging
    logger = logging.getLogger(__name__)
    ffmpeg_path = _shutil.which('ffmpeg')
    if not ffmpeg_path:
        log("ffmpeg not found in PATH", file=sys.stderr)
        return False
    try:
        # Step 1: Get duration of each file to calculate chapter timestamps
        chapters = []
        current_time_ms = 0
        log(f"Analyzing {len(files)} files for chapter information...", file=sys.stderr)
        logger.info(f"[merge-file] Analyzing files for chapters")
        for file_path in files:
            # Get duration using ffprobe
            try:
                ffprobe_cmd = [
                    'ffprobe', '-v', 'error', '-show_entries',
                    'format=duration', '-print_format',
                    'default=noprint_wrappers=1:nokey=1', str(file_path)
                ]
                probe_result = _subprocess.run(ffprobe_cmd, capture_output=True, text=True, timeout=10)
                if probe_result.returncode == 0 and probe_result.stdout.strip():
                    try:
                        duration_sec = float(probe_result.stdout.strip())
                    except ValueError:
                        logger.warning(f"[merge-file] Could not parse duration from ffprobe output: {probe_result.stdout}")
                        duration_sec = 0
                else:
                    logger.warning(f"[merge-file] ffprobe failed for {file_path.name}: {probe_result.stderr}")
                    duration_sec = 0
            except Exception as e:
                logger.warning(f"[merge-file] Could not get duration for {file_path.name}: {e}")
                duration_sec = 0
            # Create chapter entry - use title: tag from metadata if available
            title = file_path.stem  # Default to filename without extension
            if HAS_METADATA_API:
                try:
                    # Try to read tags from .tags sidecar file
                    tags_file = file_path.with_suffix(file_path.suffix + '.tags')
                    if tags_file.exists():
                        tags = read_tags_from_file(tags_file)
                        if tags:
                            # Look for title: tag
                            for tag in tags:
                                if isinstance(tag, str) and tag.lower().startswith('title:'):
                                    # Extract the title value after the colon
                                    title = tag.split(':', 1)[1].strip()
                                    break
                except Exception as e:
                    logger.debug(f"[merge-file] Could not read metadata for {file_path.name}: {e}")
                    pass  # Fall back to filename
            # Convert seconds to HH:MM:SS.mmm format
            hours = int(current_time_ms // 3600000)
            minutes = int((current_time_ms % 3600000) // 60000)
            seconds = int((current_time_ms % 60000) // 1000)
            millis = int(current_time_ms % 1000)
            chapters.append({
                'time_ms': current_time_ms,
                'time_str': f"{hours:02d}:{minutes:02d}:{seconds:02d}.{millis:03d}",
                'title': title,
                'duration_sec': duration_sec
            })
            logger.info(f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)")
            current_time_ms += int(duration_sec * 1000)
        # Step 2: Create concat demuxer file
        concat_file = output.parent / f".concat_{output.stem}.txt"
        concat_lines = []
        for f in files:
            # Escape quotes in path
            safe_path = str(f).replace("'", "'\\''")
            concat_lines.append(f"file '{safe_path}'")
        concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
        # Step 3: Create FFmpeg metadata file with chapters
        metadata_file = output.parent / f".metadata_{output.stem}.txt"
        metadata_lines = [';FFMETADATA1']
        for i, chapter in enumerate(chapters):
            # FFMetadata format for chapters (note: [CHAPTER] not [CHAPTER01])
            metadata_lines.append('[CHAPTER]')
            metadata_lines.append('TIMEBASE=1/1000')
            metadata_lines.append(f'START={chapter["time_ms"]}')
            # Calculate end time (start of next chapter or end of file)
            if i < len(chapters) - 1:
                metadata_lines.append(f'END={chapters[i+1]["time_ms"]}')
            else:
                metadata_lines.append(f'END={current_time_ms}')
            metadata_lines.append(f'title={chapter["title"]}')
        metadata_file.write_text('\n'.join(metadata_lines), encoding='utf-8')
        log(f"Created chapters metadata file with {len(chapters)} chapters", file=sys.stderr)
        logger.info(f"[merge-file] Created {len(chapters)} chapters")
        # Step 4: Build FFmpeg command to merge and embed chapters
        # Strategy: First merge audio, then add metadata in separate pass
        cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
        # Add threading options for speed
        cmd.extend(['-threads', '0'])  # Use all available threads
        # Audio codec selection for first input
        if output_format == 'mp3':
            cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2'])
        elif output_format == 'm4a':
            # Use copy if possible (much faster), otherwise re-encode
            # Check if inputs are already AAC/M4A to avoid re-encoding
            # For now, default to copy if format matches, otherwise re-encode
            # But since we are merging potentially different codecs, re-encoding is safer
            # To speed up re-encoding, we can use a faster preset or hardware accel if available
            cmd.extend(['-c:a', 'aac', '-b:a', '256k'])  # M4A with better quality
        elif output_format == 'aac':
            cmd.extend(['-c:a', 'aac', '-b:a', '192k'])
        elif output_format == 'opus':
            cmd.extend(['-c:a', 'libopus', '-b:a', '128k'])
        elif output_format == 'mka':
            # FLAC is fast to encode but large. Copy is fastest if inputs are compatible.
            # If we want speed, copy is best. If we want compatibility, re-encode.
            # Let's try copy first if inputs are same format, but that's hard to detect here.
            # Defaulting to copy for MKA as it's a container that supports many codecs
            cmd.extend(['-c:a', 'copy']) 
        else:
            cmd.extend(['-c:a', 'copy'])  # Copy without re-encoding
        # Add the output file
        cmd.append(str(output))
        log(f"Merging {len(files)} audio files to {output_format}...", file=sys.stderr)
        logger.info(f"[merge-file] Running ffmpeg merge: {' '.join(cmd)}")
        # Run ffmpeg with progress monitoring
        try:
            from helper.progress import print_progress, print_final_progress
            import re
            process = _subprocess.Popen(
                cmd,
                stdout=_subprocess.PIPE,
                stderr=_subprocess.PIPE,
                text=True,
                encoding='utf-8',
                errors='replace'
            )
            # Monitor progress
            duration_re = re.compile(r"time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})")
            total_duration_sec = current_time_ms / 1000.0
            while True:
                # Read stderr line by line (ffmpeg writes progress to stderr)
                if process.stderr:
                    line = process.stderr.readline()
                    if not line and process.poll() is not None:
                        break
                    if line:
                        # Parse time=HH:MM:SS.mm
                        match = duration_re.search(line)
                        if match and total_duration_sec > 0:
                            h, m, s, cs = map(int, match.groups())
                            current_sec = h * 3600 + m * 60 + s + cs / 100.0
                            # Calculate speed/bitrate if available (optional)
                            # For now just show percentage
                            print_progress(
                                output.name,
                                int(current_sec * 1000), # Use ms as "bytes" for progress bar
                                int(total_duration_sec * 1000),
                                speed=0
                            )
                else:
                    break
            # Wait for completion
            stdout, stderr = process.communicate()
            if process.returncode != 0:
                log(f"FFmpeg error: {stderr}", file=sys.stderr)
                raise _subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)
            print_final_progress(output.name, int(total_duration_sec * 1000), 0)
        except Exception as e:
            logger.exception(f"[merge-file] ffmpeg process error: {e}")
            raise
        log(f"Merge successful, adding chapters metadata...", file=sys.stderr)
        # Step 5: Embed chapters into container (MKA, MP4/M4A, or note limitation)
        if output_format == 'mka' or output.suffix.lower() == '.mka':
            # MKA/MKV format has native chapter support via FFMetadata
            # Re-mux the file with chapters embedded (copy streams, no re-encode)
            log(f"Embedding chapters into Matroska container...", file=sys.stderr)
            logger.info(f"[merge-file] Adding chapters to MKA file via FFMetadata")
            temp_output = output.parent / f".temp_{output.stem}.mka"
            # Use mkvmerge if available (best for MKA chapters), otherwise fall back to ffmpeg
            mkvmerge_path = _shutil.which('mkvmerge')
            if mkvmerge_path:
                # mkvmerge is the best tool for embedding chapters in Matroska files
                log(f"Using mkvmerge for optimal chapter embedding...", file=sys.stderr)
                cmd2 = [
                    mkvmerge_path, '-o', str(temp_output),
                    '--chapters', str(metadata_file),
                    str(output)
                ]
            else:
                # Fallback to ffmpeg with proper chapter embedding for Matroska
                log(f"Using ffmpeg for chapter embedding (install mkvtoolnix for better quality)...", file=sys.stderr)
                # For Matroska files, the metadata must be provided via -f ffmetadata input
                cmd2 = [
                    ffmpeg_path, '-y',
                    '-i', str(output),                       # Input: merged audio
                    '-i', str(metadata_file),                # Input: FFMetadata file
                    '-c:a', 'copy',                          # Copy audio without re-encoding
                    '-threads', '0',                         # Use all threads
                    '-map', '0',                             # Map all from first input
                    '-map_chapters', '1',                    # Map CHAPTERS from second input (FFMetadata)
                    str(temp_output)                         # Output
                ]
            logger.info(f"[merge-file] Running chapter embedding: {' '.join(cmd2)}")
            try:
                # Run chapter embedding silently (progress handled by worker thread)
                _subprocess.run(
                    cmd2,
                    capture_output=True,
                    text=True,
                    stdin=_subprocess.DEVNULL,
                    timeout=600,
                    check=False
                )
                # Replace original with temp if successful
                if temp_output.exists() and temp_output.stat().st_size > 0:
                    try:
                        import shutil
                        if output.exists():
                            output.unlink()
                        shutil.move(str(temp_output), str(output))
                        log(f"✓ Chapters successfully embedded!", file=sys.stderr)
                        logger.info(f"[merge-file] Chapters embedded successfully")
                    except Exception as e:
                        logger.warning(f"[merge-file] Could not replace file: {e}")
                        log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
                        try:
                            temp_output.unlink()
                        except Exception:
                            pass
                else:
                    logger.warning(f"[merge-file] Chapter embedding did not create output")
            except Exception as e:
                logger.exception(f"[merge-file] Chapter embedding failed: {e}")
                log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr)
        elif output_format == 'm4a' or output.suffix.lower() in ['.m4a', '.mp4']:
            # MP4/M4A format has native chapter support via iTunes metadata atoms
            log(f"Embedding chapters into MP4 container...", file=sys.stderr)
            logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata")
            temp_output = output.parent / f".temp_{output.stem}{output.suffix}"
            # ffmpeg embeds chapters in MP4 using -map_metadata and -map_chapters
            log(f"Using ffmpeg for MP4 chapter embedding...", file=sys.stderr)
            cmd2 = [
                ffmpeg_path, '-y',
                '-i', str(output),                       # Input: merged audio
                '-i', str(metadata_file),                # Input: FFMetadata file
                '-c:a', 'copy',                          # Copy audio without re-encoding
                '-threads', '0',                         # Use all threads
                '-map', '0',                             # Map all from first input
                '-map_metadata', '1',                    # Map metadata from second input (FFMetadata)
                '-map_chapters', '1',                    # Map CHAPTERS from second input (FFMetadata)
                str(temp_output)                         # Output
            ]
            logger.info(f"[merge-file] Running MP4 chapter embedding: {' '.join(cmd2)}")
            try:
                # Run MP4 chapter embedding silently (progress handled by worker thread)
                _subprocess.run(
                    cmd2,
                    capture_output=True,
                    text=True,
                    stdin=_subprocess.DEVNULL,
                    timeout=600,
                    check=False
                )
                # Replace original with temp if successful
                if temp_output.exists() and temp_output.stat().st_size > 0:
                    try:
                        import shutil
                        if output.exists():
                            output.unlink()
                        shutil.move(str(temp_output), str(output))
                        log(f"✓ Chapters successfully embedded in MP4!", file=sys.stderr)
                        logger.info(f"[merge-file] MP4 chapters embedded successfully")
                    except Exception as e:
                        logger.warning(f"[merge-file] Could not replace file: {e}")
                        log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
                        try:
                            temp_output.unlink()
                        except Exception:
                            pass
                else:
                    logger.warning(f"[merge-file] MP4 chapter embedding did not create output")
            except Exception as e:
                logger.exception(f"[merge-file] MP4 chapter embedding failed: {e}")
                log(f"Warning: MP4 chapter embedding failed, using merge without chapters", file=sys.stderr)
        else:
            # For other formats, chapters would require external tools
            logger.info(f"[merge-file] Format {output_format} does not have native chapter support")
            log(f"Note: For chapter support, use MKA or M4A format", file=sys.stderr)
        # Clean up temp files
        try:
            concat_file.unlink()
        except Exception:
            pass
        try:
            metadata_file.unlink()
        except Exception:
            pass
        return True
    except Exception as e:
        log(f"Audio merge error: {e}", file=sys.stderr)
        logger.error(f"[merge-file] Audio merge error: {e}", exc_info=True)
        return False
 def _merge_video(files: List[Path], output: Path, output_format: str) -> bool:
    """Merge video files."""
    ffmpeg_path = _shutil.which('ffmpeg')
    if not ffmpeg_path:
        log("ffmpeg not found in PATH", file=sys.stderr)
        return False
    try:
        # Create concat demuxer file
        concat_file = output.parent / f".concat_{output.stem}.txt"
        concat_lines = []
        for f in files:
            safe_path = str(f).replace("'", "'\\''")
            concat_lines.append(f"file '{safe_path}'")
        concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
        # Build FFmpeg command for video merge
        cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
        # Video codec selection
        if output_format == 'mp4':
            cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-tag:v', 'hvc1', '-c:a', 'aac', '-b:a', '192k'])
        elif output_format == 'mkv':
            cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-c:a', 'aac', '-b:a', '192k'])
        else:
            cmd.extend(['-c', 'copy'])  # Copy without re-encoding
        cmd.append(str(output))
        log(f"Merging {len(files)} video files...", file=sys.stderr)
        result = _subprocess.run(cmd, capture_output=True, text=True)
        # Clean up concat file
        try:
            concat_file.unlink()
        except Exception:
            pass
        if result.returncode != 0:
            stderr = (result.stderr or '').strip()
            log(f"FFmpeg error: {stderr}", file=sys.stderr)
            return False
        return True
    except Exception as e:
        log(f"Video merge error: {e}", file=sys.stderr)
        return False
 def _merge_text(files: List[Path], output: Path) -> bool:
    """Merge text files."""
    try:
        with open(output, 'w', encoding='utf-8') as outf:
            for i, f in enumerate(files):
                if i > 0:
                    outf.write('\n---\n')  # Separator between files
                try:
                    content = f.read_text(encoding='utf-8', errors='replace')
                    outf.write(content)
                except Exception as e:
                    log(f"Warning reading {f.name}: {e}", file=sys.stderr)
        return True
    except Exception as e:
        log(f"Text merge error: {e}", file=sys.stderr)
        return False
 def _merge_pdf(files: List[Path], output: Path) -> bool:
    """Merge PDF files."""
    if not HAS_PYPDF2:
        log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
        return False
    try:
        if HAS_PYPDF2:
            writer = PdfWriter()
        else:
            log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
            return False
        for f in files:
            try:
                reader = PdfReader(f)
                for page in reader.pages:
                    writer.add_page(page)
                log(f"Added {len(reader.pages)} pages from {f.name}", file=sys.stderr)
            except Exception as e:
                log(f"Error reading PDF {f.name}: {e}", file=sys.stderr)
                return False
        with open(output, 'wb') as outf:
            writer.write(outf)
        return True
    except Exception as e:
        log(f"PDF merge error: {e}", file=sys.stderr)
        return False
 CMDLET = Cmdlet(
    name="merge-file",
    summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
    usage="merge-file [-delete] [-output <path>] [-format <auto|mp3|aac|opus|mp4|mkv|pdf|txt>]",
    args=[
        CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
        CmdletArg("-output", description="Override output file path."),
        CmdletArg("-format", description="Output format (auto/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
    ],
    details=[
        "- Pipe multiple files: search-file query | [1,2,3] | merge-file",
        "- Audio files merge with minimal quality loss using specified codec.",
        "- Video files merge into MP4 or MKV containers.",
        "- PDF files merge into a single PDF document.",
        "- Text/document files are concatenated.",
        "- Output name derived from first file with ' (merged)' suffix.",
        "- -delete flag removes all source files after successful merge.",
    ],
 )
--- a/cmdlets/pipe.py
+++ b/cmdlets/pipe.py
@@ -0,0 +1,335 @@
 from typing import Any, Dict, Sequence, List, Optional
 import sys
 import json
 import platform
 import socket
 import re
 import subprocess
 from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
 from helper.logger import log
 from result_table import ResultTable
 from .get_file import _get_fixed_ipc_pipe
 import pipeline as ctx
 def _send_ipc_command(command: Dict[str, Any]) -> Optional[Any]:
    """Send a command to the MPV IPC pipe and return the response."""
    ipc_pipe = _get_fixed_ipc_pipe()
    request = json.dumps(command) + "\n"
    try:
        if platform.system() == 'Windows':
            # Windows named pipe
            # Opening in r+b mode to read response
            try:
                with open(ipc_pipe, 'r+b', buffering=0) as pipe:
                    pipe.write(request.encode('utf-8'))
                    pipe.flush()
                    # Read response
                    # We'll try to read a line. This might block if MPV is unresponsive.
                    response_line = pipe.readline()
                    if response_line:
                        return json.loads(response_line.decode('utf-8'))
            except FileNotFoundError:
                return None # MPV not running
            except Exception as e:
                log(f"Windows IPC Error: {e}", file=sys.stderr)
                return None
        else:
            # Unix socket
            af_unix = getattr(socket, 'AF_UNIX', None)
            if af_unix is None:
                log("Unix sockets not supported on this platform", file=sys.stderr)
                return None
            try:
                sock = socket.socket(af_unix, socket.SOCK_STREAM)
                sock.settimeout(2.0)
                sock.connect(ipc_pipe)
                sock.sendall(request.encode('utf-8'))
                # Read response
                response_data = b""
                while True:
                    try:
                        chunk = sock.recv(4096)
                        if not chunk:
                            break
                        response_data += chunk
                        if b"\n" in chunk:
                            break
                    except socket.timeout:
                        break
                sock.close()
                if response_data:
                    # Parse lines, look for response to our request
                    lines = response_data.decode('utf-8').strip().split('\n')
                    for line in lines:
                        try:
                            resp = json.loads(line)
                            # If it has 'error' field, it's a response
                            if 'error' in resp:
                                return resp
                        except:
                            pass
            except (FileNotFoundError, ConnectionRefusedError):
                return None # MPV not running
            except Exception as e:
                log(f"Unix IPC Error: {e}", file=sys.stderr)
                return None
    except Exception as e:
        log(f"IPC Error: {e}", file=sys.stderr)
        return None
    return None
 def _get_playlist() -> List[Dict[str, Any]]:
    """Get the current playlist from MPV."""
    cmd = {"command": ["get_property", "playlist"], "request_id": 100}
    resp = _send_ipc_command(cmd)
    if resp and resp.get("error") == "success":
        return resp.get("data", [])
    return []
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    """Manage and play items in the MPV playlist via IPC."""
    parsed = parse_cmdlet_args(args, CMDLET)
    # Handle positional index argument if provided
    index_arg = parsed.get("index")
    clear_mode = parsed.get("clear")
    list_mode = parsed.get("list")
    # Handle piped input (add to playlist)
    if result:
        # If result is a list of items, add them to playlist
        items_to_add = []
        if isinstance(result, list):
            items_to_add = result
        elif isinstance(result, dict):
            items_to_add = [result]
        added_count = 0
        for i, item in enumerate(items_to_add):
            # Extract URL/Path
            target = None
            title = None
            if isinstance(item, dict):
                target = item.get("target") or item.get("url") or item.get("path")
                title = item.get("title") or item.get("name")
            elif hasattr(item, "target"):
                target = item.target
                title = getattr(item, "title", None)
            elif isinstance(item, str):
                target = item
            if target:
                # Add to MPV playlist
                # We use loadfile with append flag
                # Configure 1080p limit for streams (bestvideo<=1080p + bestaudio)
                options = {
                    "ytdl-format": "bestvideo[height<=?1080]+bestaudio/best[height<=?1080]"
                }
                if title:
                    options["force-media-title"] = title
                cmd = {"command": ["loadfile", target, "append", options], "request_id": 200}
                resp = _send_ipc_command(cmd)
                if resp is None:
                    # MPV not running (or died)
                    # Start MPV with remaining items
                    _start_mpv(items_to_add[i:])
                    return 0
                elif resp.get("error") == "success":
                    added_count += 1
                    if title:
                        log(f"Queued: {title}")
                    else:
                        log(f"Queued: {target}")
        if added_count > 0:
            # If we added items, we might want to play the first one if nothing is playing?
            # For now, just list the playlist
            pass
    # Get playlist from MPV
    items = _get_playlist()
    if not items:
        log("MPV playlist is empty or MPV is not running.")
        return 0
    # If index is provided, perform action (Play or Clear)
    if index_arg is not None:
        try:
            # Handle 1-based index
            idx = int(index_arg) - 1
            if idx < 0 or idx >= len(items):
                log(f"Index {index_arg} out of range (1-{len(items)}).")
                return 1
            item = items[idx]
            title = item.get("title") or item.get("filename") or "Unknown"
            if clear_mode:
                # Remove item
                cmd = {"command": ["playlist-remove", idx], "request_id": 101}
                resp = _send_ipc_command(cmd)
                if resp and resp.get("error") == "success":
                    log(f"Removed: {title}")
                    # Refresh items for listing
                    items = _get_playlist()
                    list_mode = True
                    index_arg = None
                else:
                    log(f"Failed to remove item: {resp.get('error') if resp else 'No response'}")
                    return 1
            else:
                # Play item
                cmd = {"command": ["playlist-play-index", idx], "request_id": 102}
                resp = _send_ipc_command(cmd)
                if resp and resp.get("error") == "success":
                    log(f"Playing: {title}")
                    return 0
                else:
                    log(f"Failed to play item: {resp.get('error') if resp else 'No response'}")
                    return 1
        except ValueError:
            log(f"Invalid index: {index_arg}")
            return 1
    # List items (Default action or after clear)
    if list_mode or index_arg is None:
        if not items:
            log("MPV playlist is empty.")
            return 0
        table = ResultTable("MPV Playlist")
        for i, item in enumerate(items):
            is_current = item.get("current", False)
            title = item.get("title") or ""
            filename = item.get("filename") or ""
            # Special handling for memory:// M3U playlists (used to pass titles via IPC)
            if "memory://" in filename and "#EXTINF:" in filename:
                try:
                    # Extract title from #EXTINF:-1,Title
                    # Use regex to find title between #EXTINF:-1, and newline
                    match = re.search(r"#EXTINF:-1,(.*?)(?:\n|\r|$)", filename)
                    if match:
                        extracted_title = match.group(1).strip()
                        if not title or title == "memory://":
                            title = extracted_title
                    # Extract actual URL
                    # Find the first line that looks like a URL and not a directive
                    lines = filename.splitlines()
                    for line in lines:
                        line = line.strip()
                        if line and not line.startswith('#') and not line.startswith('memory://'):
                            filename = line
                            break
                except Exception:
                    pass
            # Truncate if too long
            if len(title) > 57:
                title = title[:57] + "..."
            if len(filename) > 27:
                filename = filename[:27] + "..."
            row = table.add_row()
            row.add_column("#", str(i + 1))
            row.add_column("Current", "*" if is_current else "")
            row.add_column("Title", title)
            row.add_column("Filename", filename)
            table.set_row_selection_args(i, [str(i + 1)])
        table.set_source_command(".pipe")
        # Register results with pipeline context so @N selection works
        ctx.set_last_result_table_overlay(table, items)
        ctx.set_current_stage_table(table)
        print(table)
    return 0
 def _start_mpv(items: List[Any]) -> None:
    """Start MPV with a list of items."""
    ipc_pipe = _get_fixed_ipc_pipe()
    cmd = ['mpv', f'--input-ipc-server={ipc_pipe}']
    cmd.append('--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]')
    # Add items
    first_title_set = False
    for item in items:
        target = None
        title = None
        if isinstance(item, dict):
            target = item.get("target") or item.get("url") or item.get("path")
            title = item.get("title") or item.get("name")
        elif hasattr(item, "target"):
            target = item.target
            title = getattr(item, "title", None)
        elif isinstance(item, str):
            target = item
        if target:
            if not first_title_set and title:
                cmd.append(f'--force-media-title={title}')
                first_title_set = True
            cmd.append(target)
    if len(cmd) > 3: # mpv + ipc + format + at least one file
        try:
            kwargs = {}
            if platform.system() == 'Windows':
                kwargs['creationflags'] = 0x00000008 # DETACHED_PROCESS
            subprocess.Popen(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **kwargs)
            log(f"Started MPV with {len(cmd)-3} items")
        except Exception as e:
            log(f"Error starting MPV: {e}", file=sys.stderr)
 CMDLET = Cmdlet(
    name=".pipe",
    aliases=["pipe", "playlist", "queue", "ls-pipe"],
    summary="Manage and play items in the MPV playlist via IPC",
    usage=".pipe [index] [-clear]",
    args=[
        CmdletArg(
            name="index",
            type="int",
            description="Index of item to play or clear",
            required=False
        ),
        CmdletArg(
            name="clear",
            type="flag",
            description="Remove the selected item from the playlist"
        ),
        CmdletArg(
            name="list",
            type="flag",
            description="List items (default)"
        ),
    ],
    exec=_run
 )
--- a/cmdlets/screen_shot.py
+++ b/cmdlets/screen_shot.py
@@ -0,0 +1,739 @@
 """Screen-shot cmdlet for capturing screenshots of URLs in a pipeline.
 This cmdlet processes files through the pipeline and creates screenshots using
 Playwright, marking them as temporary artifacts for cleanup.
 """
 from __future__ import annotations
 import contextlib
 import hashlib
 import importlib
 import sys
 import time
 import httpx
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Sequence, Tuple
 from urllib.parse import urlsplit, quote, urljoin
 from helper.logger import log
 from helper.http_client import HTTPClient
 from . import register
 from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input
 import models
 import pipeline as pipeline_context
 # ============================================================================
 # CMDLET Metadata Declaration
 # ============================================================================
 # ============================================================================
 # Playwright & Screenshot Dependencies
 # ============================================================================
 try:
    from playwright.sync_api import (
        TimeoutError as PlaywrightTimeoutError,
        ViewportSize,
        sync_playwright,
    )
 except Exception as exc:
    raise RuntimeError(
        "playwright is required for screenshot capture; install with 'pip install playwright'"
    ) from exc
 try:
    from config import resolve_output_dir
 except ImportError:
    try:
        _parent_dir = str(Path(__file__).parent.parent)
        if _parent_dir not in sys.path:
            sys.path.insert(0, _parent_dir)
        from config import resolve_output_dir
    except ImportError:
        resolve_output_dir = None
 # ============================================================================
 # Screenshot Constants & Configuration
 # ============================================================================
 USER_AGENT = (
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/120.0.0.0 Safari/537.36"
 )
 DEFAULT_VIEWPORT: ViewportSize = {"width": 1280, "height": 1200}
 ARCHIVE_TIMEOUT = 30.0
 class ScreenshotError(RuntimeError):
    """Raised when screenshot capture or upload fails."""
@dataclass(slots=True)
 class ScreenshotOptions:
    """Options controlling screenshot capture and post-processing."""
    url: str
    output_dir: Path
    output_path: Optional[Path] = None
    full_page: bool = True
    headless: bool = True
    wait_after_load: float = 2.0
    wait_for_article: bool = False
    replace_video_posters: bool = True
    tags: Sequence[str] = ()
    archive: bool = False
    archive_timeout: float = ARCHIVE_TIMEOUT
    known_urls: Sequence[str] = ()
    output_format: Optional[str] = None
    prefer_platform_target: bool = False
    target_selectors: Optional[Sequence[str]] = None
    selector_timeout_ms: int = 10_000
@dataclass(slots=True)
 class ScreenshotResult:
    """Details about the captured screenshot."""
    path: Path
    url: str
    tags_applied: List[str]
    archive_urls: List[str]
    known_urls: List[str]
    warnings: List[str] = field(default_factory=list)
 # ============================================================================
 # Helper Functions
 # ============================================================================
 def _ensure_directory(path: Path) -> None:
    """Ensure directory exists."""
    if not isinstance(path, Path):
        path = Path(path)
    path.mkdir(parents=True, exist_ok=True)
 def _unique_path(path: Path) -> Path:
    """Get unique path by appending numbers if file exists."""
    if not path.exists():
        return path
    stem = path.stem
    suffix = path.suffix
    parent = path.parent
    counter = 1
    while True:
        new_path = parent / f"{stem}_{counter}{suffix}"
        if not new_path.exists():
            return new_path
        counter += 1
 def _unique_preserve_order(items: Sequence[str]) -> List[str]:
    """Remove duplicates while preserving order."""
    seen = set()
    result = []
    for item in items:
        if item not in seen:
            seen.add(item)
            result.append(item)
    return result
 def _slugify_url(url: str) -> str:
    """Convert URL to filesystem-safe slug."""
    parsed = urlsplit(url)
    candidate = f"{parsed.netloc}{parsed.path}"
    if parsed.query:
        candidate += f"?{parsed.query}"
    slug = "".join(char if char.isalnum() else "-" for char in candidate.lower())
    slug = slug.strip("-") or "screenshot"
    return slug[:100]
 def _normalise_format(fmt: Optional[str]) -> str:
    """Normalize output format to valid values."""
    if not fmt:
        return "png"
    value = fmt.strip().lower()
    if value in {"jpg", "jpeg"}:
        return "jpeg"
    if value in {"png", "pdf"}:
        return value
    return "png"
 def _format_suffix(fmt: str) -> str:
    """Get file suffix for format."""
    if fmt == "jpeg":
        return ".jpg"
    return f".{fmt}"
 def _selectors_for_url(url: str) -> List[str]:
    """Return a list of likely content selectors for known platforms."""
    u = url.lower()
    sels: List[str] = []
    # Twitter/X
    if "twitter.com" in u or "x.com" in u:
        sels.extend([
            "article[role='article']",
            "div[data-testid='tweet']",
            "div[data-testid='cellInnerDiv'] article",
        ])
    # Instagram
    if "instagram.com" in u:
        sels.extend([
            "article[role='presentation']",
            "article[role='article']",
            "div[role='dialog'] article",
            "section main article",
        ])
    # Reddit
    if "reddit.com" in u:
        sels.extend([
            "shreddit-post",
            "div[data-testid='post-container']",
            "div[data-click-id='background']",
            "article",
        ])
    # Rumble (video post)
    if "rumble.com" in u:
        sels.extend([
            "rumble-player, iframe.rumble",
            "div.video-item--main",
            "main article",
        ])
    return sels or ["article"]
 def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
    """Best-effort page tweaks for popular platforms before capture."""
    u = url.lower()
    def _try_click_texts(texts: List[str], passes: int = 2, per_timeout: int = 700) -> int:
        clicks = 0
        for _ in range(max(1, passes)):
            for t in texts:
                try:
                    page.locator(f"text=/{t}/i").first.click(timeout=per_timeout)
                    clicks += 1
                except PlaywrightTimeoutError:
                    pass
                except Exception:
                    pass
            time.sleep(0.1)
        return clicks
    # Dismiss common cookie/consent prompts
    _try_click_texts(["accept", "i agree", "agree", "got it", "allow all", "consent"])
    # Platform-specific expansions
    if "reddit.com" in u:
        _try_click_texts(["see more", "read more", "show more", "more"])
    if ("twitter.com" in u) or ("x.com" in u):
        _try_click_texts(["show more", "more"])
    if "instagram.com" in u:
        _try_click_texts(["more", "see more"])
    if "tiktok.com" in u:
        _try_click_texts(["more", "see more"])
    if ("facebook.com" in u) or ("fb.watch" in u):
        _try_click_texts(["see more", "show more", "more"])
    if "rumble.com" in u:
        _try_click_texts(["accept", "agree", "close"])
 def _submit_wayback(url: str, timeout: float) -> Optional[str]:
    """Submit URL to Internet Archive Wayback Machine."""
    encoded = quote(url, safe="/:?=&")
    with HTTPClient() as client:
        response = client.get(f"https://web.archive.org/save/{encoded}")
        response.raise_for_status()
        content_location = response.headers.get("Content-Location")
        if content_location:
            return urljoin("https://web.archive.org", content_location)
        return str(response.url)
 def _submit_archive_today(url: str, timeout: float) -> Optional[str]:
    """Submit URL to Archive.today."""
    encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
    with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
        response = client.get(f"https://archive.today/submit/?url={encoded}")
        response.raise_for_status()
        final = str(response.url)
        if final and ("archive.today" in final or "archive.ph" in final):
            return final
        return None
 def _submit_archive_ph(url: str, timeout: float) -> Optional[str]:
    """Submit URL to Archive.ph."""
    encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
    with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
        response = client.get(f"https://archive.ph/submit/?url={encoded}")
        response.raise_for_status()
        final = str(response.url)
        if final and "archive.ph" in final:
            return final
        return None
 def _archive_url(url: str, timeout: float) -> Tuple[List[str], List[str]]:
    """Submit URL to all available archive services."""
    archives: List[str] = []
    warnings: List[str] = []
    for submitter, label in (
        (_submit_wayback, "wayback"),
        (_submit_archive_today, "archive.today"),
        (_submit_archive_ph, "archive.ph"),
    ):
        try:
            log(f"Archiving to {label}...", flush=True)
            archived = submitter(url, timeout)
        except httpx.HTTPStatusError as exc:
            if exc.response.status_code == 429:
                warnings.append(f"archive {label} rate limited (HTTP 429)")
                log(f"{label}: Rate limited (HTTP 429)", flush=True)
            else:
                warnings.append(f"archive {label} failed: HTTP {exc.response.status_code}")
                log(f"{label}: HTTP {exc.response.status_code}", flush=True)
        except httpx.RequestError as exc:
            warnings.append(f"archive {label} failed: {exc}")
            log(f"{label}: Connection error: {exc}", flush=True)
        except Exception as exc:
            warnings.append(f"archive {label} failed: {exc}")
            log(f"{label}: {exc}", flush=True)
        else:
            if archived:
                archives.append(archived)
                log(f"{label}: Success - {archived}", flush=True)
            else:
                log(f"{label}: No archive link returned", flush=True)
    return archives, warnings
 def _prepare_output_path(options: ScreenshotOptions) -> Path:
    """Prepare and validate output path for screenshot."""
    _ensure_directory(options.output_dir)
    explicit_format = _normalise_format(options.output_format) if options.output_format else None
    inferred_format: Optional[str] = None
    if options.output_path is not None:
        path = options.output_path
        if not path.is_absolute():
            path = options.output_dir / path
        suffix = path.suffix.lower()
        if suffix:
            inferred_format = _normalise_format(suffix[1:])
    else:
        stamp = time.strftime("%Y%m%d_%H%M%S")
        filename = f"{_slugify_url(options.url)}_{stamp}"
        path = options.output_dir / filename
    final_format = explicit_format or inferred_format or "png"
    if not path.suffix:
        path = path.with_suffix(_format_suffix(final_format))
    else:
        current_suffix = path.suffix.lower()
        expected = _format_suffix(final_format)
        if current_suffix != expected:
            path = path.with_suffix(expected)
    options.output_format = final_format
    return _unique_path(path)
 def _capture_with_playwright(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None:
    """Capture screenshot using Playwright."""
    playwright = None
    browser = None
    context = None
    try:
        log("Starting Playwright...", flush=True)
        playwright = sync_playwright().start()
        log("Launching Chromium browser...", flush=True)
        format_name = _normalise_format(options.output_format)
        headless = options.headless or format_name == "pdf"
        if format_name == "pdf" and not options.headless:
            warnings.append("pdf output requires headless Chromium; overriding headless mode")
        browser = playwright.chromium.launch(
            headless=headless,
            args=["--disable-blink-features=AutomationControlled"],
        )
        log("Creating browser context...", flush=True)
        context = browser.new_context(
            user_agent=USER_AGENT,
            viewport=DEFAULT_VIEWPORT,
            ignore_https_errors=True,
        )
        page = context.new_page()
        log(f"Navigating to {options.url}...", flush=True)
        try:
            page.goto(options.url, timeout=90_000, wait_until="domcontentloaded")
            log("Page loaded successfully", flush=True)
        except PlaywrightTimeoutError:
            warnings.append("navigation timeout; capturing current page state")
            log("Navigation timeout; proceeding with current state", flush=True)
        # Skip article lookup by default (wait_for_article defaults to False)
        if options.wait_for_article:
            try:
                log("Waiting for article element...", flush=True)
                page.wait_for_selector("article", timeout=10_000)
                log("Article element found", flush=True)
            except PlaywrightTimeoutError:
                warnings.append("<article> selector not found; capturing fallback")
                log("Article element not found; using fallback", flush=True)
        if options.wait_after_load > 0:
            log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
            time.sleep(min(10.0, max(0.0, options.wait_after_load)))
        if options.replace_video_posters:
            log("Replacing video elements with posters...", flush=True)
            page.evaluate(
                """
                    document.querySelectorAll('video').forEach(v => {
                        if (v.poster) {
                            const img = document.createElement('img');
                            img.src = v.poster;
                            img.style.maxWidth = '100%';
                            img.style.borderRadius = '12px';
                            v.replaceWith(img);
                        }
                    });
                """
            )
        # Attempt platform-specific target capture if requested (and not PDF)
        element_captured = False
        if options.prefer_platform_target and format_name != "pdf":
            log("Attempting platform-specific content capture...", flush=True)
            try:
                _platform_preprocess(options.url, page, warnings)
            except Exception:
                pass
            selectors = list(options.target_selectors or [])
            if not selectors:
                selectors = _selectors_for_url(options.url)
            for sel in selectors:
                try:
                    log(f"Trying selector: {sel}", flush=True)
                    el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
                except PlaywrightTimeoutError:
                    log(f"Selector not found: {sel}", flush=True)
                    continue
                try:
                    if el is not None:
                        log(f"Found element with selector: {sel}", flush=True)
                        try:
                            el.scroll_into_view_if_needed(timeout=1000)
                        except Exception:
                            pass
                        log(f"Capturing element to {destination}...", flush=True)
                        el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
                        element_captured = True
                        log("Element captured successfully", flush=True)
                        break
                except Exception as exc:
                    warnings.append(f"element capture failed for '{sel}': {exc}")
                    log(f"Failed to capture element: {exc}", flush=True)
        # Fallback to default capture paths
        if element_captured:
            pass
        elif format_name == "pdf":
            log("Generating PDF...", flush=True)
            page.emulate_media(media="print")
            page.pdf(path=str(destination), print_background=True)
            log(f"PDF saved to {destination}", flush=True)
        else:
            log(f"Capturing full page to {destination}...", flush=True)
            screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
            if format_name == "jpeg":
                screenshot_kwargs["type"] = "jpeg"
                screenshot_kwargs["quality"] = 90
            if options.full_page:
                page.screenshot(full_page=True, **screenshot_kwargs)
            else:
                article = page.query_selector("article")
                if article is not None:
                    article_kwargs = dict(screenshot_kwargs)
                    article_kwargs.pop("full_page", None)
                    article.screenshot(**article_kwargs)
                else:
                    page.screenshot(**screenshot_kwargs)
            log(f"Screenshot saved to {destination}", flush=True)
    except Exception as exc:
        raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
    finally:
        log("Cleaning up browser resources...", flush=True)
        with contextlib.suppress(Exception):
            if context is not None:
                context.close()
        with contextlib.suppress(Exception):
            if browser is not None:
                browser.close()
        with contextlib.suppress(Exception):
            if playwright is not None:
                playwright.stop()
        log("Cleanup complete", flush=True)
 def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
    """Capture a screenshot for the given options."""
    destination = _prepare_output_path(options)
    warnings: List[str] = []
    _capture_with_playwright(options, destination, warnings)
    known_urls = _unique_preserve_order([options.url, *options.known_urls])
    archive_urls: List[str] = []
    if options.archive:
        archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
        archive_urls.extend(archives)
        warnings.extend(archive_warnings)
        if archives:
            known_urls = _unique_preserve_order([*known_urls, *archives])
    applied_tags = _unique_preserve_order(list(tag for tag in options.tags if tag.strip()))
    return ScreenshotResult(
        path=destination,
        url=options.url,
        tags_applied=applied_tags,
        archive_urls=archive_urls,
        known_urls=known_urls,
        warnings=warnings,
    )
 # ============================================================================
 # Main Cmdlet Function
 # ============================================================================
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    """Take screenshots of URLs in the pipeline.
    Accepts:
    - Single result object (dict or PipeObject) with 'file_path' field
    - List of result objects to screenshot each
    - Direct URL as string
    Emits PipeObject-formatted results for each screenshot with:
    - action: 'cmdlet:screen-shot'
    - is_temp: True (screenshots are temporary artifacts)
    - parent_id: hash of the original file/URL
    Screenshots are created using Playwright and marked as temporary
    so they can be cleaned up later with the cleanup cmdlet.
    """
    from ._shared import parse_cmdlet_args
    # Help check
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    # ========================================================================
    # ARGUMENT PARSING
    # ========================================================================
    parsed = parse_cmdlet_args(args, CMDLET)
    format_value = parsed.get("format")
    storage_value = parsed.get("storage")
    selector_arg = parsed.get("selector")
    selectors = [selector_arg] if selector_arg else []
    archive_enabled = parsed.get("archive", False)
    # Positional URL argument (if provided)
    url_arg = parsed.get("url")
    positional_urls = [str(url_arg)] if url_arg else []
    # ========================================================================
    # INPUT PROCESSING - Extract URLs from pipeline or command arguments
    # ========================================================================
    piped_results = normalize_result_input(result)
    urls_to_process = []
    # Extract URLs from piped results
    if piped_results:
        for item in piped_results:
            url = None
            if isinstance(item, dict):
                url = item.get('file_path') or item.get('path') or item.get('url') or item.get('target')
            else:
                url = getattr(item, 'file_path', None) or getattr(item, 'path', None) or getattr(item, 'url', None) or getattr(item, 'target', None)
            if url:
                urls_to_process.append(str(url))
    # Use positional arguments if no pipeline input
    if not urls_to_process and positional_urls:
        urls_to_process = positional_urls
    if not urls_to_process:
        log(f"No URLs to process for screen-shot cmdlet", file=sys.stderr)
        return 1
    # ========================================================================
    # OUTPUT DIRECTORY RESOLUTION - Priority chain
    # ========================================================================
    screenshot_dir: Optional[Path] = None
    # Primary: Use --storage if provided (highest priority)
    if storage_value:
        try:
            screenshot_dir = SharedArgs.resolve_storage(storage_value)
            log(f"[screen_shot] Using --storage {storage_value}: {screenshot_dir}", flush=True)
        except ValueError as e:
            log(str(e), file=sys.stderr)
            return 1
    # Secondary: Use config-based resolver ONLY if --storage not provided
    if screenshot_dir is None and resolve_output_dir is not None:
        try:
            screenshot_dir = resolve_output_dir(config)
            log(f"[screen_shot] Using config resolver: {screenshot_dir}", flush=True)
        except Exception:
            pass
    # Tertiary: Use config outfile ONLY if neither --storage nor resolver worked
    if screenshot_dir is None and config and config.get("outfile"):
        try:
            screenshot_dir = Path(config["outfile"]).expanduser()
            log(f"[screen_shot] Using config outfile: {screenshot_dir}", flush=True)
        except Exception:
            pass
    # Default: User's Videos directory
    if screenshot_dir is None:
        screenshot_dir = Path.home() / "Videos"
        log(f"[screen_shot] Using default directory: {screenshot_dir}", flush=True)
    _ensure_directory(screenshot_dir)
    # ========================================================================
    # PREPARE SCREENSHOT OPTIONS
    # ========================================================================
    format_name = _normalise_format(format_value)
    filtered_selectors = [str(s).strip() for s in selectors if str(s).strip()]
    target_selectors = filtered_selectors if filtered_selectors else None
    all_emitted = []
    exit_code = 0
    # ========================================================================
    # PROCESS URLs AND CAPTURE SCREENSHOTS
    # ========================================================================
    for url in urls_to_process:
        # Validate URL format
        if not url.lower().startswith(("http://", "https://", "file://")):
            log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr)
            continue
        try:
            # Create screenshot with provided options
            options = ScreenshotOptions(
                url=url,
                output_dir=screenshot_dir,
                output_format=format_name,
                archive=archive_enabled,
                target_selectors=target_selectors,
                prefer_platform_target=False,
                wait_for_article=False,
                full_page=True,
            )
            screenshot_result = _capture_screenshot(options)
            # Log results and warnings
            log(f"Screenshot captured to {screenshot_result.path}", flush=True)
            if screenshot_result.archive_urls:
                log(f"Archives: {', '.join(screenshot_result.archive_urls)}", flush=True)
            for warning in screenshot_result.warnings:
                log(f"Warning: {warning}", flush=True)
            # Compute hash of screenshot file
            screenshot_hash = None
            try:
                with open(screenshot_result.path, 'rb') as f:
                    screenshot_hash = hashlib.sha256(f.read()).hexdigest()
            except Exception:
                pass
            # Create PipeObject result - marked as TEMP since derivative artifact
            pipe_obj = create_pipe_object_result(
                source='screenshot',
                identifier=Path(screenshot_result.path).stem,
                file_path=str(screenshot_result.path),
                cmdlet_name='screen-shot',
                title=f"Screenshot: {Path(screenshot_result.path).name}",
                file_hash=screenshot_hash,
                is_temp=True,
                parent_hash=hashlib.sha256(url.encode()).hexdigest(),
                extra={
                    'source_url': url,
                    'archive_urls': screenshot_result.archive_urls,
                    'known_urls': screenshot_result.known_urls,
                    'target': str(screenshot_result.path), # Explicit target for add-file
                }
            )
            # Emit the result so downstream cmdlets (like add-file) can use it
            pipeline_context.emit(pipe_obj)
            all_emitted.append(pipe_obj)
        except ScreenshotError as exc:
            log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
            exit_code = 1
        except Exception as exc:
            log(f"Unexpected error taking screenshot of {url}: {exc}", file=sys.stderr)
            import traceback
            traceback.print_exc(file=sys.stderr)
            exit_code = 1
    if not all_emitted:
        log(f"No screenshots were successfully captured", file=sys.stderr)
        return 1
    # Log completion message
    log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)", flush=True)
    return exit_code
 CMDLET = Cmdlet(
    name="screen-shot",
    summary="Capture a screenshot of a URL or file and mark as temporary artifact",
    usage="screen-shot <url> [options] or download-data <url> | screen-shot [options]",
    aliases=["screenshot", "ss"],
    args=[
        CmdletArg(name="url", type="string", required=False, description="URL to screenshot (or from pipeline)"),
        CmdletArg(name="format", type="string", description="Output format: png, jpeg, or pdf"),
        CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
        SharedArgs.ARCHIVE,  # Use shared archive argument
        SharedArgs.STORAGE,  # Use shared storage argument
    ],
    details=[
        "Take screenshots of URLs with optional archiving and element targeting.",
        "Screenshots are marked as temporary artifacts for cleanup by the cleanup cmdlet.",
        "",
        "Arguments:",
        "  url                  URL to capture (optional if piped from pipeline)",
        "  --format FORMAT      Output format: png (default), jpeg, or pdf",
        "  --selector SEL       CSS selector for capturing specific element",
        "  --archive, -arch     Archive URL to Wayback/Archive.today/Archive.ph",
        "  --storage LOCATION   Storage destination: hydrus, local, 0x0, debrid, or ftp",
        "",
        "Examples:",
        "  download-data https://example.com | screen-shot --storage local",
        "  download-data https://twitter.com/user/status/123 | screen-shot --selector 'article[role=article]' --storage hydrus --archive",
        "  screen-shot https://example.com --format jpeg --storage 0x0 --archive",
    ]
 )
--- a/cmdlets/search_file.py
+++ b/cmdlets/search_file.py
@@ -0,0 +1,351 @@
 """Search-file cmdlet: Search for files by query, tags, size, type, duration, etc."""
 from __future__ import annotations
 from typing import Any, Dict, Sequence, List, Optional, Tuple, Callable
 from fnmatch import fnmatchcase
 from pathlib import Path
 from dataclasses import dataclass, field
 import json
 import os
 import sys
 from helper.logger import log, debug
 import shutil
 import subprocess
 from helper.file_storage import FileStorage
 from helper.search_provider import get_provider, list_providers, SearchResult
 from metadata import import_pending_sidecars
 from . import register
 from ._shared import Cmdlet, CmdletArg
 import models
 import pipeline as ctx
 # Optional dependencies
 try:
    import mutagen  # type: ignore
 except ImportError:  # pragma: no cover
    mutagen = None  # type: ignore
 try:
    from config import get_hydrus_url, resolve_output_dir
 except Exception:  # pragma: no cover
    get_hydrus_url = None  # type: ignore
    resolve_output_dir = None  # type: ignore
 try:
    from helper.hydrus import HydrusClient, HydrusRequestError
 except ImportError:  # pragma: no cover
    HydrusClient = None  # type: ignore
    HydrusRequestError = RuntimeError  # type: ignore
 try:
    from helper.utils import sha256_file
 except ImportError:  # pragma: no cover
    sha256_file = None  # type: ignore
 try:
    from helper.utils_constant import mime_maps
 except ImportError:  # pragma: no cover
    mime_maps = {}  # type: ignore
 # ============================================================================
 # Data Classes (from helper/search.py)
 # ============================================================================
@dataclass(slots=True)
 class SearchRecord:
    path: str
    size_bytes: int | None = None
    duration_seconds: str | None = None
    tags: str | None = None
    hash_hex: str | None = None
    def as_dict(self) -> dict[str, str]:
        payload: dict[str, str] = {"path": self.path}
        if self.size_bytes is not None:
            payload["size"] = str(self.size_bytes)
        if self.duration_seconds:
            payload["duration"] = self.duration_seconds
        if self.tags:
            payload["tags"] = self.tags
        if self.hash_hex:
            payload["hash"] = self.hash_hex
        return payload
@dataclass
 class ResultItem:
    origin: str
    title: str
    detail: str
    annotations: List[str]
    target: str
    media_kind: str = "other"
    hash_hex: Optional[str] = None
    columns: List[tuple[str, str]] = field(default_factory=list)
    tag_summary: Optional[str] = None
    duration_seconds: Optional[float] = None
    size_bytes: Optional[int] = None
    full_metadata: Optional[Dict[str, Any]] = None
    tags: Optional[set[str]] = field(default_factory=set)
    relationships: Optional[List[str]] = field(default_factory=list)
    known_urls: Optional[List[str]] = field(default_factory=list)
    def to_dict(self) -> Dict[str, Any]:
        payload: Dict[str, Any] = {
            "title": self.title,
        }
        # Always include these core fields for downstream cmdlets (get-file, download-data, etc)
        payload["origin"] = self.origin
        payload["target"] = self.target
        payload["media_kind"] = self.media_kind
        # Always include full_metadata if present (needed by download-data, etc)
        # This is NOT for display, but for downstream processing
        if self.full_metadata:
            payload["full_metadata"] = self.full_metadata
        # Include columns if defined (result renderer will use these for display)
        if self.columns:
            payload["columns"] = list(self.columns)
        else:
            # If no columns, include the detail for backwards compatibility
            payload["detail"] = self.detail
            payload["annotations"] = list(self.annotations)
        # Include optional fields
        if self.hash_hex:
            payload["hash"] = self.hash_hex
        if self.tag_summary:
            payload["tags"] = self.tag_summary
        if self.tags:
            payload["tags_set"] = list(self.tags)
        if self.relationships:
            payload["relationships"] = self.relationships
        if self.known_urls:
            payload["known_urls"] = self.known_urls
        return payload
 STORAGE_ORIGINS = {"local", "hydrus", "debrid"}
 def _ensure_storage_columns(payload: Dict[str, Any]) -> Dict[str, Any]:
 	"""Attach Title/Store columns for storage-origin results to keep CLI display compact."""
 	origin_value = str(payload.get("origin") or payload.get("source") or "").lower()
 	if origin_value not in STORAGE_ORIGINS:
 		return payload
 	title = payload.get("title") or payload.get("name") or payload.get("target") or payload.get("path") or "Result"
 	store_label = payload.get("origin") or payload.get("source") or origin_value
 	normalized = dict(payload)
 	normalized["columns"] = [("Title", str(title)), ("Store", str(store_label))]
 	return normalized
 CMDLET = Cmdlet(
    name="search-file",
    summary="Unified search cmdlet for searchable backends (Hydrus, Local, Debrid, LibGen, OpenLibrary, Soulseek).",
    usage="search-file [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-storage BACKEND] [-provider PROVIDER]",
    args=[
        CmdletArg("query", description="Search query string"),
        CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
        CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
        CmdletArg("type", description="Filter by type: audio, video, image, document"),
        CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
        CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
        CmdletArg("storage", description="Search storage backend: hydrus, local, debrid (default: all searchable)"),
        CmdletArg("provider", description="Search provider: libgen, openlibrary, soulseek, debrid, local (overrides -storage)"),
    ],
    details=[
        "Search across multiple providers: File storage (Hydrus, Local, Debrid), Books (LibGen, OpenLibrary), Music (Soulseek)",
        "Use -provider to search a specific source, or -storage to search file backends",
        "Filter results by: tag, size, type, duration",
        "Results can be piped to other commands",
        "Examples:",
        "search-file foo                                      # Search all file backends",
        "search-file -provider libgen 'python programming'   # Search LibGen books",
        "search-file -provider debrid 'movie'                # Search AllDebrid magnets",
        "search-file 'music' -provider soulseek               # Search Soulseek P2P",
        "search-file -provider openlibrary 'tolkien'         # Search OpenLibrary",
        "search-file song -storage hydrus -type audio        # Search only Hydrus audio",
        "search-file movie -tag action -provider debrid      # Debrid with filters",
    ],
 )
@register(["search-file", "search"])
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	"""Search across multiple providers: Hydrus, Local, Debrid, LibGen, etc."""
 	args_list = [str(arg) for arg in (args or [])]
 	# Parse arguments
 	query = ""
 	tag_filters: List[str] = []
 	size_filter: Optional[Tuple[str, int]] = None
 	duration_filter: Optional[Tuple[str, float]] = None
 	type_filter: Optional[str] = None
 	storage_backend: Optional[str] = None
 	provider_name: Optional[str] = None
 	limit = 100
 	# Simple argument parsing
 	i = 0
 	while i < len(args_list):
 		arg = args_list[i]
 		low = arg.lower()
 		if low in {"-provider", "--provider"} and i + 1 < len(args_list):
 			provider_name = args_list[i + 1].lower()
 			i += 2
 		elif low in {"-storage", "--storage"} and i + 1 < len(args_list):
 			storage_backend = args_list[i + 1].lower()
 			i += 2
 		elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
 			tag_filters.append(args_list[i + 1])
 			i += 2
 		elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
 			try:
 				limit = int(args_list[i + 1])
 			except ValueError:
 				limit = 100
 			i += 2
 		elif low in {"-type", "--type"} and i + 1 < len(args_list):
 			type_filter = args_list[i + 1].lower()
 			i += 2
 		elif not query and not arg.startswith("-"):
 			query = arg
 			i += 1
 		else:
 			i += 1
 	if not query:
 		log("Provide a search query", file=sys.stderr)
 		return 1
 	# Initialize worker for this search command
 	from helper.local_library import LocalLibraryDB
 	from config import get_local_storage_path
 	import uuid
 	worker_id = str(uuid.uuid4())
 	library_root = get_local_storage_path(config or {})
 	if not library_root:
 		log("No library root configured", file=sys.stderr)
 		return 1
 	db = LocalLibraryDB(library_root)
 	db.insert_worker(
 		worker_id,
 		"search",
 		title=f"Search: {query}",
 		description=f"Query: {query}",
 		pipe=ctx.get_current_command_text()
 	)
 	try:
 		results_list = []
 		# Try to search using provider (libgen, soulseek, debrid, openlibrary)
 		if provider_name:
 			debug(f"[search_file] Attempting provider search with: {provider_name}")
 			provider = get_provider(provider_name, config)
 			if not provider:
 				log(f"Provider '{provider_name}' not available", file=sys.stderr)
 				db.update_worker_status(worker_id, 'error')
 				return 1
 			debug(f"[search_file] Provider loaded, calling search with query: {query}")
 			search_result = provider.search(query, limit=limit)
 			debug(f"[search_file] Provider search returned {len(search_result)} results")
 			for item in search_result:
 				item_dict = item.to_dict()
 				results_list.append(item_dict)
 				ctx.emit(item_dict)
 			debug(f"[search_file] Emitted {len(results_list)} results")
 			# Write results to worker stdout
 			db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
 			db.update_worker_status(worker_id, 'completed')
 			return 0
 		# Otherwise search using FileStorage (Hydrus, Local, Debrid backends)
 		from helper.file_storage import FileStorage
 		storage = FileStorage(config=config or {})
 		backend_to_search = storage_backend or None
 		if backend_to_search:
 			# Check if requested backend is available
 			if backend_to_search == "hydrus":
 				from helper.hydrus import is_hydrus_available
 				if not is_hydrus_available(config or {}):
 					log(f"Backend 'hydrus' is not available (Hydrus service not running)", file=sys.stderr)
 					db.update_worker_status(worker_id, 'error')
 					return 1
 			if not storage.supports_search(backend_to_search):
 				log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
 				db.update_worker_status(worker_id, 'error')
 				return 1
 			results = storage[backend_to_search].search(query, limit=limit)
 		else:
 			# Search all searchable backends, but skip hydrus if unavailable
 			from helper.hydrus import is_hydrus_available
 			hydrus_available = is_hydrus_available(config or {})
 			all_results = []
 			for backend_name in storage.list_searchable_backends():
 				# Skip hydrus if not available
 				if backend_name == "hydrus" and not hydrus_available:
 					continue
 				try:
 					backend_results = storage[backend_name].search(query, limit=limit - len(all_results))
 					if backend_results:
 						all_results.extend(backend_results)
 					if len(all_results) >= limit:
 						break
 				except Exception as exc:
 					log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
 			results = all_results[:limit]
 		# Emit results and collect for workers table
 		if results:
 			for item in results:
 				if isinstance(item, dict):
 					normalized = _ensure_storage_columns(item)
 					results_list.append(normalized)
 					ctx.emit(normalized)
 				elif isinstance(item, ResultItem):
 					item_dict = item.to_dict()
 					results_list.append(item_dict)
 					ctx.emit(item_dict)
 				else:
 					item_dict = {"title": str(item)}
 					results_list.append(item_dict)
 					ctx.emit(item_dict)
 			# Write results to worker stdout
 			db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
 		else:
 			log("No results found", file=sys.stderr)
 			db.append_worker_stdout(worker_id, json.dumps([], indent=2))
 		db.update_worker_status(worker_id, 'completed')
 		return 0
 	except Exception as exc:
 		log(f"Search failed: {exc}", file=sys.stderr)
 		import traceback
 		traceback.print_exc(file=sys.stderr)
 		db.update_worker_status(worker_id, 'error')
 		return 1
 	finally:
 		# Always close the database connection
 		try:
 			db.close()
 		except Exception:
 			pass
--- a/cmdlets/worker.py
+++ b/cmdlets/worker.py
@@ -0,0 +1,325 @@
 """Worker cmdlet: Display workers table in ResultTable format."""
 from __future__ import annotations
 from typing import Any, Dict, Sequence, List
 import json
 import sys
 from datetime import datetime, timezone
 from . import register
 from ._shared import Cmdlet, CmdletArg
 import pipeline as ctx
 from helper.logger import log
 from config import get_local_storage_path
 CMDLET = Cmdlet(
    name=".worker",
    summary="Display workers table in result table format.",
    usage=".worker [status] [-limit N] [@N]",
    args=[
        CmdletArg("status", description="Filter by status: running, completed, error (default: all)"),
        CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
        CmdletArg("@N", description="Select worker by index (1-based) and display full logs"),
    ],
    details=[
        "- Shows all background worker tasks and their output",
        "- Can filter by status: running, completed, error",
        "- Search result stdout is captured from each worker",
        "- Use @N to select a specific worker by index and display its full logs",
        "Examples:",
        ".worker                        # Show all workers",
        ".worker running                # Show running workers only",
        ".worker completed -limit 50    # Show 50 most recent completed workers",
        ".worker @3                     # Show full logs for the 3rd worker",
        ".worker running @2             # Show full logs for the 2nd running worker",
    ],
 )
@register([".worker", "worker", "workers"])
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	"""Display workers table or show detailed logs for a specific worker."""
 	args_list = [str(arg) for arg in (args or [])]
 	selection_indices = ctx.get_last_selection()
 	selection_requested = bool(selection_indices) and isinstance(result, list) and len(result) > 0
 	# Parse arguments for list view
 	status_filter: str | None = None
 	limit = 100
 	clear_requested = False
 	worker_id_arg: str | None = None
 	i = 0
 	while i < len(args_list):
 		arg = args_list[i]
 		low = arg.lower()
 		if low in {"-limit", "--limit"} and i + 1 < len(args_list):
 			try:
 				limit = max(1, int(args_list[i + 1]))
 			except ValueError:
 				limit = 100
 			i += 2
 		elif low in {"-id", "--id"} and i + 1 < len(args_list):
 			worker_id_arg = args_list[i + 1]
 			i += 2
 		elif low in {"-clear", "--clear"}:
 			clear_requested = True
 			i += 1
 		elif low in {"running", "completed", "error", "cancelled"}:
 			status_filter = low
 			i += 1
 		elif not arg.startswith("-"):
 			status_filter = low
 			i += 1
 		else:
 			i += 1
 	try:
 		if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
 			log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
 			return 0
 	except Exception:
 		pass
 	library_root = get_local_storage_path(config or {})
 	if not library_root:
 		log("No library root configured", file=sys.stderr)
 		return 1
 	try:
 		from helper.local_library import LocalLibraryDB
 		db: LocalLibraryDB | None = None
 		try:
 			db = LocalLibraryDB(library_root)
 			if clear_requested:
 				count = db.clear_finished_workers()
 				log(f"Cleared {count} finished workers.")
 				return 0
 			if worker_id_arg:
 				worker = db.get_worker(worker_id_arg)
 				if worker:
 					events = []
 					try:
 						wid = worker.get("worker_id")
 						if wid and hasattr(db, "get_worker_events"):
 							events = db.get_worker_events(wid)
 					except Exception:
 						pass
 					_emit_worker_detail(worker, events)
 					return 0
 				else:
 					log(f"Worker not found: {worker_id_arg}", file=sys.stderr)
 					return 1
 			if selection_requested:
 				return _render_worker_selection(db, result)
 			return _render_worker_list(db, status_filter, limit)
 		finally:
 			if db:
 				db.close()
 	except Exception as exc:
 		log(f"Workers query failed: {exc}", file=sys.stderr)
 		import traceback
 		traceback.print_exc(file=sys.stderr)
 		return 1
 def _render_worker_list(db, status_filter: str | None, limit: int) -> int:
 	workers = db.get_all_workers(limit=limit)
 	if status_filter:
 		workers = [w for w in workers if str(w.get("status", "")).lower() == status_filter]
 	if not workers:
 		log("No workers found", file=sys.stderr)
 		return 0
 	for worker in workers:
 		started = worker.get("started_at", "")
 		ended = worker.get("completed_at", worker.get("last_updated", ""))
 		date_str = _extract_date(started)
 		start_time = _format_event_timestamp(started)
 		end_time = _format_event_timestamp(ended)
 		item = {
 			"columns": [
 				("Status", worker.get("status", "")),
 				("Pipe", _summarize_pipe(worker.get("pipe"))),
 				("Date", date_str),
 				("Start Time", start_time),
 				("End Time", end_time),
 			],
 			"__worker_metadata": worker,
 			"_selection_args": ["-id", worker.get("worker_id")]
 		}
 		ctx.emit(item)
 	return 0
 def _render_worker_selection(db, selected_items: Any) -> int:
 	if not isinstance(selected_items, list):
 		log("Selection payload missing", file=sys.stderr)
 		return 1
 	emitted = False
 	for item in selected_items:
 		worker = _resolve_worker_record(db, item)
 		if not worker:
 			continue
 		events = []
 		try:
 			events = db.get_worker_events(worker.get("worker_id")) if hasattr(db, "get_worker_events") else []
 		except Exception:
 			events = []
 		_emit_worker_detail(worker, events)
 		emitted = True
 	if not emitted:
 		log("Selected rows no longer exist", file=sys.stderr)
 		return 1
 	return 0
 def _resolve_worker_record(db, payload: Any) -> Dict[str, Any] | None:
 	if not isinstance(payload, dict):
 		return None
 	worker_data = payload.get("__worker_metadata")
 	worker_id = None
 	if isinstance(worker_data, dict):
 		worker_id = worker_data.get("worker_id")
 	else:
 		worker_id = payload.get("worker_id")
 		worker_data = None
 	if worker_id:
 		fresh = db.get_worker(worker_id)
 		if fresh:
 			return fresh
 	return worker_data if isinstance(worker_data, dict) else None
 def _emit_worker_detail(worker: Dict[str, Any], events: List[Dict[str, Any]]) -> None:
 	# Parse stdout logs into rows
 	stdout_content = worker.get("stdout", "") or ""
 	# Try to parse lines if they follow the standard log format
 	# Format: YYYY-MM-DD HH:MM:SS - name - level - message
 	lines = stdout_content.splitlines()
 	for line in lines:
 		line = line.strip()
 		if not line:
 			continue
 		# Default values
 		timestamp = ""
 		level = "INFO"
 		message = line
 		# Try to parse standard format
 		try:
 			parts = line.split(" - ", 3)
 			if len(parts) >= 4:
 				# Full format
 				ts_str, _, lvl, msg = parts
 				timestamp = _format_event_timestamp(ts_str)
 				level = lvl
 				message = msg
 			elif len(parts) == 3:
 				# Missing name or level
 				ts_str, lvl, msg = parts
 				timestamp = _format_event_timestamp(ts_str)
 				level = lvl
 				message = msg
 		except Exception:
 			pass
 		item = {
 			"columns": [
 				("Time", timestamp),
 				("Level", level),
 				("Message", message)
 			]
 		}
 		ctx.emit(item)
 	# Also emit events if available and not redundant
 	# (For now, just focusing on stdout logs as requested)
 def _summarize_pipe(pipe_value: Any, limit: int = 60) -> str:
 	text = str(pipe_value or "").strip()
 	if not text:
 		return "(none)"
 	return text if len(text) <= limit else text[: limit - 3] + "..."
 def _format_event_timestamp(raw_timestamp: Any) -> str:
 	dt = _parse_to_local(raw_timestamp)
 	if dt:
 		return dt.strftime("%H:%M:%S")
 	if not raw_timestamp:
 		return "--:--:--"
 	text = str(raw_timestamp)
 	if "T" in text:
 		time_part = text.split("T", 1)[1]
 	elif " " in text:
 		time_part = text.split(" ", 1)[1]
 	else:
 		time_part = text
 	return time_part[:8] if len(time_part) >= 8 else time_part
 def _parse_to_local(timestamp_str: Any) -> datetime | None:
 	if not timestamp_str:
 		return None
 	text = str(timestamp_str).strip()
 	if not text:
 		return None
 	try:
 		# Check for T separator (Python isoformat - Local time)
 		if 'T' in text:
 			return datetime.fromisoformat(text)
 		# Check for space separator (SQLite CURRENT_TIMESTAMP - UTC)
 		# Format: YYYY-MM-DD HH:MM:SS
 		if ' ' in text:
 			# Assume UTC
 			dt = datetime.strptime(text, "%Y-%m-%d %H:%M:%S")
 			dt = dt.replace(tzinfo=timezone.utc)
 			return dt.astimezone() # Convert to local
 	except Exception:
 		pass
 	return None
 def _extract_date(raw_timestamp: Any) -> str:
 	dt = _parse_to_local(raw_timestamp)
 	if dt:
 		return dt.strftime("%m-%d-%y")
 	# Fallback
 	if not raw_timestamp:
 		return ""
 	text = str(raw_timestamp)
 	# Extract YYYY-MM-DD part
 	date_part = ""
 	if "T" in text:
 		date_part = text.split("T", 1)[0]
 	elif " " in text:
 		date_part = text.split(" ", 1)[0]
 	else:
 		date_part = text
 	# Convert YYYY-MM-DD to MM-DD-YY
 	try:
 		parts = date_part.split("-")
 		if len(parts) == 3:
 			year, month, day = parts
 			return f"{month}-{day}-{year[2:]}"
 	except Exception:
 		pass
 	return date_part
--- a/config.py
+++ b/config.py
@@ -0,0 +1,360 @@
 """Unified configuration helpers for downlow."""
 from __future__ import annotations
 import json
 from pathlib import Path
 from typing import Any, Dict, Optional
 from pathlib import Path
 from helper.logger import log
 DEFAULT_CONFIG_FILENAME = "config.json"
 SCRIPT_DIR = Path(__file__).resolve().parent
 _CONFIG_CACHE: Dict[str, Dict[str, Any]] = {}
 def _make_cache_key(config_dir: Optional[Path], filename: str, actual_path: Optional[Path]) -> str:
    if actual_path:
        return str(actual_path.resolve())
    base_dir = (config_dir or SCRIPT_DIR)
    return str((base_dir / filename).resolve())
 def get_hydrus_instance(config: Dict[str, Any], instance_name: str = "home") -> Optional[Dict[str, Any]]:
    """Get a specific Hydrus instance config by name.
    Supports both formats:
    - New: config["storage"]["hydrus"][instance_name] = {"key": "...", "url": "..."}
    - Old: config["HydrusNetwork"][instance_name] = {"key": "...", "url": "..."}
    Args:
        config: Configuration dict
        instance_name: Name of the Hydrus instance (default: "home")
    Returns:
        Dict with "key" and "url" keys, or None if not found
    """
    # Try new format first
    storage = config.get("storage", {})
    if isinstance(storage, dict):
        hydrus_config = storage.get("hydrus", {})
        if isinstance(hydrus_config, dict):
            instance = hydrus_config.get(instance_name)
            if isinstance(instance, dict):
                return instance
    # Fall back to old format
    hydrus_network = config.get("HydrusNetwork")
    if not isinstance(hydrus_network, dict):
        return None
    instance = hydrus_network.get(instance_name)
    if isinstance(instance, dict):
        return instance
    return None
 def get_hydrus_access_key(config: Dict[str, Any], instance_name: str = "home") -> Optional[str]:
    """Get Hydrus access key for an instance.
    Supports both old flat format and new nested format:
    - Old: config["HydrusNetwork_Access_Key"]
    - New: config["HydrusNetwork"][instance_name]["key"]
    Args:
        config: Configuration dict
        instance_name: Name of the Hydrus instance (default: "home")
    Returns:
        Access key string, or None if not found
    """
    instance = get_hydrus_instance(config, instance_name)
    key = instance.get("key") if instance else config.get("HydrusNetwork_Access_Key")
    return str(key).strip() if key else None
 def get_hydrus_url(config: Dict[str, Any], instance_name: str = "home") -> Optional[str]:
    """Get Hydrus URL for an instance.
    Supports both old flat format and new nested format:
    - Old: config["HydrusNetwork_URL"] or constructed from IP/Port/HTTPS
    - New: config["HydrusNetwork"][instance_name]["url"]
    Args:
        config: Configuration dict
        instance_name: Name of the Hydrus instance (default: "home")
    Returns:
        URL string, or None if not found
    """
    instance = get_hydrus_instance(config, instance_name)
    url = instance.get("url") if instance else config.get("HydrusNetwork_URL")
    if url:  # Check if not None and not empty
        return str(url).strip()
    # Build from IP/Port/HTTPS if not found
    host = str(config.get("HydrusNetwork_IP") or "localhost").strip() or "localhost"
    port = str(config.get("HydrusNetwork_Port") or "45869").strip()
    scheme = "https" if str(config.get("HydrusNetwork_Use_HTTPS") or "").strip().lower() in {"1", "true", "yes", "on"} else "http"
    authority = host if not (":" in host and not host.startswith("[")) else f"[{host}]"
    return f"{scheme}://{authority}:{port}"
 def resolve_output_dir(config: Dict[str, Any]) -> Path:
    """Resolve output directory from config with single source of truth.
    Priority:
    1. config["temp"] - explicitly set temp/output directory
    2. config["outfile"] - fallback to outfile setting
    3. Home/Videos - safe user directory fallback
    Returns:
        Path to output directory
    """
    # First try explicit temp setting from config
    temp_value = config.get("temp")
    if temp_value:
        try:
            path = Path(str(temp_value)).expanduser()
            # Verify we can access it (not a system directory with permission issues)
            if path.exists() or path.parent.exists():
                return path
        except Exception:
            pass
    # Then try outfile setting
    outfile_value = config.get("outfile")
    if outfile_value:
        try:
            return Path(str(outfile_value)).expanduser()
        except Exception:
            pass
    # Fallback to user's Videos directory
    return Path.home() / "Videos"
 def get_local_storage_path(config: Dict[str, Any]) -> Optional[Path]:
    """Get local storage path from config.
    Supports both formats:
    - New: config["storage"]["local"]["path"]
    - Old: config["Local"]["path"]
    Args:
        config: Configuration dict
    Returns:
        Path object if found, None otherwise
    """
    # Try new format first
    storage = config.get("storage", {})
    if isinstance(storage, dict):
        local_config = storage.get("local", {})
        if isinstance(local_config, dict):
            path_str = local_config.get("path")
            if path_str:
                return Path(str(path_str)).expanduser()
    # Fall back to old format
    local_config = config.get("Local", {})
    if isinstance(local_config, dict):
        path_str = local_config.get("path")
        if path_str:
            return Path(str(path_str)).expanduser()
    return None
 def get_debrid_api_key(config: Dict[str, Any], service: str = "All-debrid") -> Optional[str]:
    """Get Debrid API key from config.
    Supports both formats:
    - New: config["storage"]["debrid"]["All-debrid"]
    - Old: config["Debrid"]["All-debrid"]
    Args:
        config: Configuration dict
        service: Service name (default: "All-debrid")
    Returns:
        API key string if found, None otherwise
    """
    # Try new format first
    storage = config.get("storage", {})
    if isinstance(storage, dict):
        debrid_config = storage.get("debrid", {})
        if isinstance(debrid_config, dict):
            api_key = debrid_config.get(service)
            if api_key:  # Check if not None and not empty
                return str(api_key).strip() if api_key else None
    # Fall back to old format
    debrid_config = config.get("Debrid", {})
    if isinstance(debrid_config, dict):
        api_key = debrid_config.get(service)
        if api_key:  # Check if not None and not empty
            return str(api_key).strip() if api_key else None
    return None
 def get_provider_credentials(config: Dict[str, Any], provider: str) -> Optional[Dict[str, str]]:
    """Get provider credentials (email/password) from config.
    Supports both formats:
    - New: config["provider"][provider] = {"email": "...", "password": "..."}
    - Old: config[provider.capitalize()] = {"email": "...", "password": "..."}
    Args:
        config: Configuration dict
        provider: Provider name (e.g., "openlibrary", "soulseek")
    Returns:
        Dict with credentials if found, None otherwise
    """
    # Try new format first
    provider_config = config.get("provider", {})
    if isinstance(provider_config, dict):
        creds = provider_config.get(provider.lower(), {})
        if isinstance(creds, dict) and creds:
            return creds
    # Fall back to old format (capitalized key)
    old_key_map = {
        "openlibrary": "OpenLibrary",
        "archive": "Archive",
        "soulseek": "Soulseek",
    }
    old_key = old_key_map.get(provider.lower())
    if old_key:
        creds = config.get(old_key, {})
        if isinstance(creds, dict) and creds:
            return creds
    return None
 def resolve_cookies_path(config: Dict[str, Any], script_dir: Optional[Path] = None) -> Optional[Path]:
    value = config.get("cookies") or config.get("Cookies_Path")
    if value:
        candidate = Path(str(value)).expanduser()
        if candidate.is_file():
            return candidate
    base_dir = script_dir or SCRIPT_DIR
    default_path = base_dir / "cookies.txt"
    if default_path.is_file():
        return default_path
    return None
 def resolve_debug_log(config: Dict[str, Any]) -> Optional[Path]:
    value = config.get("download_debug_log")
    if not value:
        return None
    path = Path(str(value)).expanduser()
    if not path.is_absolute():
        path = Path.cwd() / path
    return path
 def load_config(config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFIG_FILENAME) -> Dict[str, Any]:
    base_dir = config_dir or SCRIPT_DIR
    config_path = base_dir / filename
    cache_key = _make_cache_key(config_dir, filename, config_path)
    if cache_key in _CONFIG_CACHE:
        return _CONFIG_CACHE[cache_key]
    try:
        raw = config_path.read_text(encoding="utf-8")
    except FileNotFoundError:
        # Try alternate filename if default not found
        if filename == DEFAULT_CONFIG_FILENAME:
            alt_path = base_dir / "downlow.json"
            try:
                raw = alt_path.read_text(encoding="utf-8")
                config_path = alt_path
                cache_key = _make_cache_key(config_dir, filename, alt_path)
            except FileNotFoundError:
                _CONFIG_CACHE[cache_key] = {}
                return {}
            except OSError as exc:
                log(f"Failed to read {alt_path}: {exc}")
                _CONFIG_CACHE[cache_key] = {}
                return {}
        else:
            _CONFIG_CACHE[cache_key] = {}
            return {}
    except OSError as exc:
        log(f"Failed to read {config_path}: {exc}")
        _CONFIG_CACHE[cache_key] = {}
        return {}
    raw = raw.strip()
    if not raw:
        _CONFIG_CACHE[cache_key] = {}
        return {}
    try:
        data = json.loads(raw)
    except json.JSONDecodeError as exc:
        log(f"Invalid JSON in {config_path}: {exc}")
        _CONFIG_CACHE[cache_key] = {}
        return {}
    if not isinstance(data, dict):
        log(f"Expected object in {config_path}, got {type(data).__name__}")
        _CONFIG_CACHE[cache_key] = {}
        return {}
    _CONFIG_CACHE[cache_key] = data
    return data
 def reload_config(config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFIG_FILENAME) -> Dict[str, Any]:
    cache_key = _make_cache_key(config_dir, filename, None)
    _CONFIG_CACHE.pop(cache_key, None)
    return load_config(config_dir=config_dir, filename=filename)
 def clear_config_cache() -> None:
    _CONFIG_CACHE.clear()
 def save_config(
    config: Dict[str, Any],
    config_dir: Optional[Path] = None,
    filename: str = DEFAULT_CONFIG_FILENAME,
 ) -> None:
    base_dir = config_dir or SCRIPT_DIR
    config_path = base_dir / filename
    # Load existing config to preserve keys that aren't being changed
    try:
        existing_raw = config_path.read_text(encoding="utf-8")
        existing_data = json.loads(existing_raw.strip())
        if isinstance(existing_data, dict):
            # Merge: existing config as base, then overlay with new config
            merged = existing_data.copy()
            merged.update(config)
            config = merged
    except (FileNotFoundError, OSError, json.JSONDecodeError):
        # File doesn't exist or is invalid, use provided config as-is
        pass
    try:
        config_path.write_text(
            json.dumps(config, ensure_ascii=False, indent=2, sort_keys=True) + "\n",
            encoding="utf-8",
        )
    except OSError as exc:
        raise RuntimeError(f"Failed to write config to {config_path}: {exc}") from exc
    cache_key = _make_cache_key(config_dir, filename, config_path)
    _CONFIG_CACHE[cache_key] = config
 def load() -> Dict[str, Any]:
    """Return the parsed downlow configuration."""
    return load_config()
 def save(config: Dict[str, Any]) -> None:
    """Persist *config* back to disk."""
    save_config(config)
--- a/helper/init.py
+++ b/helper/init.py
@@ -0,0 +1,92 @@
 """Helper modules for the downlow mpv integration."""
 from . import hydrus as _hydrus
 from . import download as _download
 from . import tasks as _tasks
 from . import utils as _utils
 try:  # Optional dependency on Playwright
    from . import webshot as _webshot
 except Exception as exc:  # pragma: no cover - surfaced when Playwright is missing
    _webshot = None  # type: ignore
    ScreenshotError = None  # type: ignore[assignment]
    ScreenshotOptions = None  # type: ignore[assignment]
    ScreenshotResult = None  # type: ignore[assignment]
    capture_screenshot = None  # type: ignore[assignment]
    ScreenshotImportError = exc  # type: ignore[assignment]
 else:
    ScreenshotError = _webshot.ScreenshotError
    ScreenshotOptions = _webshot.ScreenshotOptions
    ScreenshotResult = _webshot.ScreenshotResult
    capture_screenshot = _webshot.capture_screenshot
    ScreenshotImportError = None
 # CBOR utilities
 decode_cbor = _utils.decode_cbor
 jsonify = _utils.jsonify
 # General utilities
 CHUNK_SIZE = _utils.CHUNK_SIZE
 ensure_directory = _utils.ensure_directory
 unique_path = _utils.unique_path
 download_hydrus_file = _hydrus.download_hydrus_file
 sanitize_metadata_value = _utils.sanitize_metadata_value
 unique_preserve_order = _utils.unique_preserve_order
 sha256_file = _utils.sha256_file
 create_metadata_sidecar = _utils.create_metadata_sidecar
 create_tags_sidecar = _utils.create_tags_sidecar
 # Format utilities
 format_bytes = _utils.format_bytes
 format_duration = _utils.format_duration
 format_timestamp = _utils.format_timestamp
 format_metadata_value = _utils.format_metadata_value
 # Link utilities
 extract_link = _utils.extract_link
 extract_link_from_args = _utils.extract_link_from_args
 extract_link_from_result = _utils.extract_link_from_result
 get_api_key = _utils.get_api_key
 add_direct_link_to_result = _utils.add_direct_link_to_result
 # URL policy utilities
 resolve_url_policy = _utils.resolve_url_policy
 UrlPolicy = _utils.UrlPolicy
 # Download utilities
 DownloadOptions = _download.DownloadOptions
 DownloadError = _download.DownloadError
 DownloadMediaResult = _download.DownloadMediaResult
 download_media = _download.download_media
 is_url_supported_by_ytdlp = _download.is_url_supported_by_ytdlp
 probe_url = _download.probe_url
 # Hydrus utilities
 hydrus_request = _hydrus.hydrus_request
 hydrus_export = _hydrus.hydrus_export
 HydrusClient = _hydrus.HydrusClient
 HydrusRequestError = _hydrus.HydrusRequestError
 connect_ipc = _tasks.connect_ipc
 ipc_sender = _tasks.ipc_sender
 __all__ = [
    'decode_cbor',
    'jsonify',
    'CHUNK_SIZE',
    'ensure_directory',
    'unique_path',
    'download_hydrus_file',
    'sanitize_metadata_value',
    'unique_preserve_order',
    'sha256_file',
    'resolve_url_policy',
    'UrlPolicy',
    'ScreenshotError',
    'ScreenshotOptions',
    'ScreenshotResult',
    'capture_screenshot',
    'ScreenshotImportError',
    'DownloadOptions',
    'DownloadError',
    'DownloadMediaResult',
    'download_media',
    'is_url_supported_by_ytdlp',
    'probe_url',
    'HydrusClient',
    'HydrusRequestError',
    'hydrus_request',
    'hydrus_export',
    'connect_ipc',
    'ipc_sender',
 ]
--- a/helper/adjective.json
+++ b/helper/adjective.json
@@ -0,0 +1,130 @@
 {
  "Occult": [
    "esoterica",
    "ritual",
    "alchemy",
    "magic",
    "hermetic",
    "divination",
    "grimoires",
    "symbolism",
    "ceremony"
  ],
  "Philosophy": [
    "ethics",
    "metaphysics",
    "epistemology",
    "logic",
    "existentialism",
    "stoicism",
    "phenomenology",
    "dialectic",
    "aesthetics"
  ],
  "Mystery": [
    "investigation",
    "crime",
    "detective",
    "noir",
    "thriller",
    "suspense",
    "conspiracy",
    "whodunit",
    "clues"
  ],
  "Religion": [
    "scripture",
    "theology",
    "worship",
    "ritual",
    "doctrine",
    "faith",
    "tradition",
    "liturgy",
    "sacred"
  ],
  "Mythology": [
    "gods",
    "creation",
    "heroes",
    "legends",
    "folklore",
    "pantheon",
    "epic",
    "mythic",
    "archetype"
  ],
  "Science": [
    "research",
    "experiment",
    "theory",
    "biology",
    "physics",
    "chemistry",
    "data",
    "method",
    "innovation"
  ],
  "Art": [
    "visual",
    "painting",
    "sculpture",
    "modernism",
    "technique",
    "studio",
    "curation",
    "expression",
    "composition"
  ],
  "Literature": [
    "fiction",
    "poetry",
    "novel",
    "criticism",
    "narrative",
    "prose",
    "drama",
    "canonical",
    "translation"
  ],
  "History": [
    "archaeology",
    "chronicle",
    "period",
    "empire",
    "revolution",
    "archive",
    "heritage",
    "historiography",
    "timeline"
  ],
  "Psychology": [
    "cognition",
    "behavior",
    "therapy",
    "development",
    "neuroscience",
    "personality",
    "perception",
    "emotion",
    "motivation"
  ],
  "gnostic": [
    "religion",
    "scripture",
    "gnostic",
    "gospel",
    "wisdom",
    "spirituality",
    "ancient",
    "philosophy",
    "esoteric",
    "mysticism",
    "mythology",
    "theology",
    "sacred",
    "divine",
    "apocrapha",
    "gnosticism"
  ]
 }
--- a/helper/alldebrid.py
+++ b/helper/alldebrid.py
@@ -0,0 +1,829 @@
 """AllDebrid API integration for converting free links to direct downloads.
 AllDebrid is a debrid service that unlocks free file hosters and provides direct download links.
 API docs: https://docs.alldebrid.com/#general-informations
 """
 from __future__ import annotations
 import json
 import sys
 from helper.logger import log, debug
 import time
 import logging
 from pathlib import Path
 from typing import Any, Dict, Optional, Set, List, Sequence
 from urllib.parse import urlencode, urlparse
 from .http_client import HTTPClient
 logger = logging.getLogger(__name__)
 class AllDebridError(Exception):
    """Raised when AllDebrid API request fails."""
    pass
 # Cache for supported hosters (domain -> host info)
 _SUPPORTED_HOSTERS_CACHE: Optional[Dict[str, Dict[str, Any]]] = None
 _CACHE_TIMESTAMP: float = 0
 _CACHE_DURATION: float = 3600  # 1 hour
 class AllDebridClient:
    """Client for AllDebrid API."""
    # Try both v4 and v3 APIs
    BASE_URLS = [
        "https://api.alldebrid.com/v4",
        "https://api.alldebrid.com/v3",
    ]
    def __init__(self, api_key: str):
        """Initialize AllDebrid client with API key.
        Args:
            api_key: AllDebrid API key from config
        """
        self.api_key = api_key.strip()
        if not self.api_key:
            raise AllDebridError("AllDebrid API key is empty")
        self.base_url = self.BASE_URLS[0]  # Start with v4
    def _request(self, endpoint: str, params: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
        """Make a request to AllDebrid API.
        Args:
            endpoint: API endpoint (e.g., "user/profile", "link/unlock")
            params: Query parameters
        Returns:
            Parsed JSON response
        Raises:
            AllDebridError: If request fails or API returns error
        """
        if params is None:
            params = {}
        # Add API key to params
        params['apikey'] = self.api_key
        url = f"{self.base_url}/{endpoint}"
        query_string = urlencode(params)
        full_url = f"{url}?{query_string}"
        logger.debug(f"[AllDebrid] {endpoint} request to {full_url[:80]}...")
        try:
            # Pass timeout to HTTPClient init, not to get()
            with HTTPClient(timeout=30.0, headers={'User-Agent': 'downlow/1.0'}) as client:
                try:
                    response = client.get(full_url)
                    response.raise_for_status()
                except Exception as req_err:
                    # Log detailed error info
                    logger.error(f"[AllDebrid] Request error to {full_url[:80]}: {req_err}", exc_info=True)
                    if hasattr(req_err, 'response') and req_err.response is not None:  # type: ignore
                        try:
                            error_body = req_err.response.content.decode('utf-8')  # type: ignore
                            logger.error(f"[AllDebrid] Response body: {error_body[:200]}")
                        except:
                            pass
                    raise
                data = json.loads(response.content.decode('utf-8'))
                logger.debug(f"[AllDebrid] Response status: {response.status_code}")
                # Check for API errors
                if data.get('status') == 'error':
                    error_msg = data.get('error', {}).get('message', 'Unknown error')
                    logger.error(f"[AllDebrid] API error: {error_msg}")
                    raise AllDebridError(f"AllDebrid API error: {error_msg}")
                return data
        except AllDebridError:
            raise
        except Exception as exc:
            error_msg = f"AllDebrid request failed: {exc}"
            logger.error(f"[AllDebrid] {error_msg}", exc_info=True)
            raise AllDebridError(error_msg)
    def unlock_link(self, link: str) -> Optional[str]:
        """Unlock a restricted link and get direct download URL.
        Args:
            link: Restricted link to unlock
        Returns:
            Direct download URL, or None if already unrestricted
        Raises:
            AllDebridError: If unlock fails
        """
        if not link.startswith(('http://', 'https://')):
            raise AllDebridError(f"Invalid URL: {link}")
        try:
            response = self._request('link/unlock', {'link': link})
            # Check if unlock was successful
            if response.get('status') == 'success':
                data = response.get('data', {})
                # AllDebrid returns the download info in 'link' field
                if 'link' in data:
                    return data['link']
                # Alternative: check for 'file' field
                if 'file' in data:
                    return data['file']
                # If no direct link, return the input link
                return link
            return None
        except AllDebridError:
            raise
        except Exception as exc:
            raise AllDebridError(f"Failed to unlock link: {exc}")
    def check_host(self, hostname: str) -> Dict[str, Any]:
        """Check if a host is supported by AllDebrid.
        Args:
            hostname: Hostname to check (e.g., "uploadhaven.com")
        Returns:
            Host information dict with support status
        Raises:
            AllDebridError: If request fails
        """
        try:
            response = self._request('host', {'name': hostname})
            if response.get('status') == 'success':
                return response.get('data', {})
            return {}
        except AllDebridError:
            raise
        except Exception as exc:
            raise AllDebridError(f"Failed to check host: {exc}")
    def get_user_info(self) -> Dict[str, Any]:
        """Get current user account information.
        Returns:
            User information dict
        Raises:
            AllDebridError: If request fails
        """
        try:
            response = self._request('user/profile')
            if response.get('status') == 'success':
                return response.get('data', {})
            return {}
        except AllDebridError:
            raise
        except Exception as exc:
            raise AllDebridError(f"Failed to get user info: {exc}")
    def get_supported_hosters(self) -> Dict[str, Dict[str, Any]]:
        """Get list of all supported hosters from AllDebrid API.
        Returns:
            Dict mapping domain to host info (status, name, etc)
        Raises:
            AllDebridError: If request fails
        """
        try:
            response = self._request('hosts/domains')
            if response.get('status') == 'success':
                data = response.get('data', {})
                # The API returns hosts keyed by domain
                return data if isinstance(data, dict) else {}
            return {}
        except AllDebridError:
            raise
        except Exception as exc:
            raise AllDebridError(f"Failed to get supported hosters: {exc}")
    def magnet_add(self, magnet_uri: str) -> Dict[str, Any]:
        """Submit a magnet link or torrent hash to AllDebrid for processing.
        AllDebrid will download the torrent content and store it in the account.
        Processing time varies based on torrent size and availability.
        Args:
            magnet_uri: Magnet URI (magnet:?xt=urn:btih:...) or torrent hash
        Returns:
            Dict with magnet info:
                - id: Magnet ID (int) - needed for status checks
                - name: Torrent name
                - hash: Torrent hash
                - size: Total file size (bytes)
                - ready: Boolean - True if already available
        Raises:
            AllDebridError: If submit fails (requires premium, invalid magnet, etc)
        """
        if not magnet_uri:
            raise AllDebridError("Magnet URI is empty")
        try:
            # API endpoint: POST /v4/magnet/upload
            # Format: /magnet/upload?apikey=key&magnets[]=magnet:?xt=...
            response = self._request('magnet/upload', {'magnets[]': magnet_uri})
            if response.get('status') == 'success':
                data = response.get('data', {})
                magnets = data.get('magnets', [])
                if magnets and len(magnets) > 0:
                    magnet_info = magnets[0]
                    # Check for errors in the magnet response
                    if 'error' in magnet_info:
                        error = magnet_info['error']
                        error_msg = error.get('message', 'Unknown error')
                        raise AllDebridError(f"Magnet error: {error_msg}")
                    return magnet_info
                raise AllDebridError("No magnet data in response")
            raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
        except AllDebridError:
            raise
        except Exception as exc:
            raise AllDebridError(f"Failed to submit magnet: {exc}")
    def magnet_status(self, magnet_id: int, include_files: bool = False) -> Dict[str, Any]:
        """Get status of a magnet currently being processed or stored.
        Status codes:
            0-3: Processing (in queue, downloading, compressing, uploading)
            4: Ready (files available for download)
            5-15: Error (upload failed, not downloaded in 20min, too big, etc)
        Args:
            magnet_id: Magnet ID from magnet_add()
            include_files: If True, includes file list in response
        Returns:
            Dict with status info:
                - id: Magnet ID
                - filename: Torrent name
                - size: Total size (bytes)
                - status: Human-readable status
                - statusCode: Numeric code (0-15)
                - downloaded: Bytes downloaded so far
                - uploaded: Bytes uploaded so far
                - seeders: Number of seeders
                - downloadSpeed: Current speed (bytes/sec)
                - uploadSpeed: Current speed (bytes/sec)
                - files: (optional) Array of file objects when include_files=True
                    Each file: {n: name, s: size, l: download_link}
        Raises:
            AllDebridError: If status check fails
        """
        if not isinstance(magnet_id, int) or magnet_id <= 0:
            raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
        try:
            # Use v4.1 endpoint for better response format
            # Temporarily override base_url for this request
            old_base = self.base_url
            self.base_url = "https://api.alldebrid.com/v4.1"
            try:
                response = self._request('magnet/status', {'id': str(magnet_id)})
            finally:
                self.base_url = old_base
            if response.get('status') == 'success':
                data = response.get('data', {})
                magnets = data.get('magnets', {})
                # Handle both list and dict responses
                if isinstance(magnets, list) and len(magnets) > 0:
                    return magnets[0]
                elif isinstance(magnets, dict) and magnets:
                    return magnets
                raise AllDebridError(f"No magnet found with ID {magnet_id}")
            raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
        except AllDebridError:
            raise
        except Exception as exc:
            raise AllDebridError(f"Failed to get magnet status: {exc}")
    def magnet_status_live(self, magnet_id: int, session: int = None, counter: int = 0) -> Dict[str, Any]:
        """Get live status of a magnet using delta sync mode.
        The live mode endpoint provides real-time progress by only sending
        deltas (changed fields) instead of full status on each call. This
        reduces bandwidth and server load compared to regular polling.
        Note: The "live" designation refers to the delta-sync mode where you
        maintain state locally and apply diffs from the API, not a streaming
        endpoint. Regular magnet_status() polling is simpler for single magnets.
        Docs: https://docs.alldebrid.com/#get-status-live-mode
        Args:
            magnet_id: Magnet ID from magnet_add()
            session: Session ID (use same ID across multiple calls). If None, will query current status
            counter: Counter value from previous response (starts at 0)
        Returns:
            Dict with magnet status. May contain only changed fields if counter > 0.
            For single-magnet tracking, use magnet_status() instead.
        Raises:
            AllDebridError: If request fails
        """
        if not isinstance(magnet_id, int) or magnet_id <= 0:
            raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
        try:
            # For single magnet queries, just use regular endpoint with ID
            # The "live mode" with session/counter is for multi-magnet dashboards
            # where bandwidth savings from diffs matter
            response = self._request('magnet/status', {'id': magnet_id})
            if response.get('status') == 'success':
                data = response.get('data', {})
                magnets = data.get('magnets', [])
                # Handle list response
                if isinstance(magnets, list) and len(magnets) > 0:
                    return magnets[0]
                raise AllDebridError(f"No magnet found with ID {magnet_id}")
            raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
        except AllDebridError:
            raise
        except Exception as exc:
            raise AllDebridError(f"Failed to get magnet live status: {exc}")
    def magnet_links(self, magnet_ids: list) -> Dict[str, Any]:
        """Get files and download links for one or more magnets.
        Use this after magnet_status shows statusCode == 4 (Ready).
        Returns the file tree structure with direct download links.
        Args:
            magnet_ids: List of magnet IDs to get files for
        Returns:
            Dict mapping magnet_id (as string) -> magnet_info:
                - id: Magnet ID
                - files: Array of file/folder objects
                    File: {n: name, s: size, l: direct_download_link}
                    Folder: {n: name, e: [sub_items]}
        Raises:
            AllDebridError: If request fails
        """
        if not magnet_ids:
            raise AllDebridError("No magnet IDs provided")
        try:
            # Build parameter: id[]=123&id[]=456 style
            params = {}
            for i, magnet_id in enumerate(magnet_ids):
                params[f'id[{i}]'] = str(magnet_id)
            response = self._request('magnet/files', params)
            if response.get('status') == 'success':
                data = response.get('data', {})
                magnets = data.get('magnets', [])
                # Convert list to dict keyed by ID (as string) for easier access
                result = {}
                for magnet_info in magnets:
                    magnet_id = magnet_info.get('id')
                    if magnet_id:
                        result[str(magnet_id)] = magnet_info
                return result
            raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
        except AllDebridError:
            raise
        except Exception as exc:
            raise AllDebridError(f"Failed to get magnet files: {exc}")
    def instant_available(self, magnet_hash: str) -> Optional[List[Dict[str, Any]]]:
        """Check if magnet is available for instant streaming without downloading.
        AllDebrid's "instant" feature checks if a magnet can be streamed directly
        without downloading all the data. Returns available video/audio files.
        Args:
            magnet_hash: Torrent hash (with or without magnet: prefix)
        Returns:
            List of available files for streaming, or None if not available
            Each file: {n: name, s: size, e: extension, t: type}
            Returns empty list if torrent not found or not available
        Raises:
            AllDebridError: If API request fails
        """
        try:
            # Parse magnet hash if needed
            if magnet_hash.startswith('magnet:'):
                # Extract hash from magnet URI
                import re
                match = re.search(r'xt=urn:btih:([a-fA-F0-9]+)', magnet_hash)
                if not match:
                    return None
                hash_value = match.group(1)
            else:
                hash_value = magnet_hash.strip()
            if not hash_value or len(hash_value) < 32:
                return None
            response = self._request('magnet/instant', {'magnet': hash_value})
            if response.get('status') == 'success':
                data = response.get('data', {})
                # Returns 'files' array if available, or empty
                return data.get('files', [])
            # Not available is not an error, just return empty list
            return []
        except AllDebridError:
            raise
        except Exception as exc:
            logger.debug(f"[AllDebrid] instant_available check failed: {exc}")
            return None
    def magnet_delete(self, magnet_id: int) -> bool:
        """Delete a magnet from the AllDebrid account.
        Args:
            magnet_id: Magnet ID to delete
        Returns:
            True if deletion was successful
        Raises:
            AllDebridError: If deletion fails
        """
        if not isinstance(magnet_id, int) or magnet_id <= 0:
            raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
        try:
            response = self._request('magnet/delete', {'id': str(magnet_id)})
            if response.get('status') == 'success':
                return True
            raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
        except AllDebridError:
            raise
        except Exception as exc:
            raise AllDebridError(f"Failed to delete magnet: {exc}")
 def _get_cached_supported_hosters(api_key: str) -> Set[str]:
    """Get cached list of supported hoster domains.
    Uses AllDebrid API to fetch the list once per hour,
    caching the result to avoid repeated API calls.
    Args:
        api_key: AllDebrid API key
    Returns:
        Set of supported domain names (lowercased)
    """
    global _SUPPORTED_HOSTERS_CACHE, _CACHE_TIMESTAMP
    now = time.time()
    # Return cached result if still valid
    if _SUPPORTED_HOSTERS_CACHE is not None and (now - _CACHE_TIMESTAMP) < _CACHE_DURATION:
        return set(_SUPPORTED_HOSTERS_CACHE.keys())
    # Fetch fresh list from API
    try:
        client = AllDebridClient(api_key)
        hosters_dict = client.get_supported_hosters()
        if hosters_dict:
            # API returns: hosts (list), streams (list), redirectors (list)
            # Combine all into a single set
            all_domains: Set[str] = set()
            # Add hosts
            if 'hosts' in hosters_dict and isinstance(hosters_dict['hosts'], list):
                all_domains.update(hosters_dict['hosts'])
            # Add streams  
            if 'streams' in hosters_dict and isinstance(hosters_dict['streams'], list):
                all_domains.update(hosters_dict['streams'])
            # Add redirectors
            if 'redirectors' in hosters_dict and isinstance(hosters_dict['redirectors'], list):
                all_domains.update(hosters_dict['redirectors'])
            # Cache as dict for consistency
            _SUPPORTED_HOSTERS_CACHE = {domain: {} for domain in all_domains}
            _CACHE_TIMESTAMP = now
            if all_domains:
                debug(f"✓ Cached {len(all_domains)} supported hosters")
            return all_domains
    except Exception as exc:
        log(f"⚠ Failed to fetch supported hosters: {exc}", file=sys.stderr)
        # Return any cached hosters even if expired
        if _SUPPORTED_HOSTERS_CACHE:
            return set(_SUPPORTED_HOSTERS_CACHE.keys())
    # Fallback: empty set if no cache available
    return set()
 def is_link_restrictable_hoster(url: str, api_key: str) -> bool:
    """Check if a URL is from a hoster that AllDebrid can unlock.
    Intelligently queries the AllDebrid API to detect if the URL is
    from a supported restricted hoster.
    Args:
        url: URL to check
        api_key: AllDebrid API key
    Returns:
        True if URL is from a supported restrictable hoster
    """
    if not url or not api_key:
        return False
    try:
        # Extract domain from URL
        parsed = urlparse(url)
        domain = parsed.netloc.lower()
        # Remove www. prefix for comparison
        if domain.startswith('www.'):
            domain = domain[4:]
        # Get supported hosters (cached)
        supported = _get_cached_supported_hosters(api_key)
        if not supported:
            # API check failed, fall back to manual detection
            # Check for common restricted hosters
            common_hosters = {
                'uploadhaven.com', 'uploaded.to', 'uploaded.net',
                'datafile.com', 'rapidfile.io', 'nitroflare.com',
                '1fichier.com', 'mega.nz', 'mediafire.com'
            }
            return any(host in url.lower() for host in common_hosters)
        # Check if domain is in supported list
        # Need to check exact match and with/without www
        return domain in supported or f"www.{domain}" in supported
    except Exception as exc:
        log(f"⚠ Hoster detection failed: {exc}", file=sys.stderr)
        return False
 def convert_link_with_debrid(link: str, api_key: str) -> Optional[str]:
    """Convert a restricted link to a direct download URL using AllDebrid.
    Args:
        link: Restricted link
        api_key: AllDebrid API key
    Returns:
        Direct download URL, or original link if already unrestricted
    """
    if not api_key:
        return None
    try:
        client = AllDebridClient(api_key)
        direct_link = client.unlock_link(link)
        if direct_link and direct_link != link:
            debug(f"✓ Converted link: {link[:60]}... → {direct_link[:60]}...")
            return direct_link
        return None
    except AllDebridError as exc:
        log(f"⚠ Failed to convert link: {exc}", file=sys.stderr)
        return None
    except Exception as exc:
        log(f"⚠ Unexpected error: {exc}", file=sys.stderr)
        return None
 def is_magnet_link(uri: str) -> bool:
    """Check if a URI is a magnet link.
    Magnet links start with 'magnet:?xt=urn:btih:' or just 'magnet:'
    Args:
        uri: URI to check
    Returns:
        True if URI is a magnet link
    """
    if not uri:
        return False
    return uri.lower().startswith('magnet:')
 def is_torrent_hash(text: str) -> bool:
    """Check if text looks like a torrent hash (40 or 64 hex characters).
    Common formats:
        - Info hash v1: 40 hex chars (SHA-1)
        - Info hash v2: 64 hex chars (SHA-256)
    Args:
        text: Text to check
    Returns:
        True if text matches torrent hash format
    """
    if not text or not isinstance(text, str):
        return False
    text = text.strip()
    # Check if it's 40 hex chars (SHA-1) or 64 hex chars (SHA-256)
    if len(text) not in (40, 64):
        return False
    try:
        # Try to parse as hex
        int(text, 16)
        return True
    except ValueError:
        return False
 def is_torrent_file(path: str) -> bool:
    """Check if a file path is a .torrent file.
    Args:
        path: File path to check
    Returns:
        True if file has .torrent extension
    """
    if not path:
        return False
    return path.lower().endswith('.torrent')
 def parse_magnet_or_hash(uri: str) -> Optional[str]:
    """Parse a magnet URI or hash into a format for AllDebrid API.
    AllDebrid's magnet/upload endpoint accepts:
        - Full magnet URIs: magnet:?xt=urn:btih:...
        - Info hashes: 40 or 64 hex characters
    Args:
        uri: Magnet URI or hash
    Returns:
        Normalized input for AllDebrid API, or None if invalid
    """
    if not uri:
        return None
    uri = uri.strip()
    # Already a magnet link - just return it
    if is_magnet_link(uri):
        return uri
    # Check if it's a valid hash
    if is_torrent_hash(uri):
        return uri
    # Not a recognized format
    return None
 # ============================================================================
 # Cmdlet: unlock_link
 # ============================================================================
 def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    """Unlock a restricted link using AllDebrid.
    Converts free hosters and restricted links to direct download URLs.
    Usage:
        unlock-link <link>
        unlock-link                    # Uses URL from pipeline result
    Requires:
        - AllDebrid API key in config under Debrid.All-debrid
    Args:
        result: Pipeline result object
        args: Command arguments
        config: Configuration dictionary
    Returns:
        0 on success, 1 on failure
    """
    try:
        from .link_utils import (
            extract_link,
            get_api_key,
            add_direct_link_to_result,
        )
    except ImportError as e:
        log(f"Required modules unavailable: {e}", file=sys.stderr)
        return 1
    # Get link from args or result
    link = extract_link(result, args)
    if not link:
        log("No valid URL provided", file=sys.stderr)
        return 1
    # Get AllDebrid API key from config
    api_key = get_api_key(config, "AllDebrid", "Debrid.All-debrid")
    if not api_key:
        log("AllDebrid API key not configured in Debrid.All-debrid", file=sys.stderr)
        return 1
    # Try to unlock the link
    debug(f"Unlocking: {link}")
    direct_link = convert_link_with_debrid(link, api_key)
    if direct_link:
        debug(f"✓ Direct link: {direct_link}")
        # Update result with direct link
        add_direct_link_to_result(result, direct_link, link)
        # Return the updated result via pipeline context
        # Note: The cmdlet wrapper will handle emitting to pipeline
        return 0
    else:
        log(f"❌ Failed to unlock link or already unrestricted", file=sys.stderr)
        return 1
 # ============================================================================
 # Cmdlet Registration
 # ============================================================================
 def _register_unlock_link():
    """Register unlock-link command with cmdlet registry if available."""
    try:
        from cmdlets import register
        @register(["unlock-link"])
        def unlock_link_wrapper(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
            """Wrapper to make unlock_link_cmdlet available as cmdlet."""
            import pipeline as ctx
            ret_code = unlock_link_cmdlet(result, args, config)
            # If successful, emit the result
            if ret_code == 0:
                ctx.emit(result)
            return ret_code
        return unlock_link_wrapper
    except ImportError:
        # If cmdlets module not available, just return None
        return None
 # Register when module is imported
 _unlock_link_registration = _register_unlock_link()
--- a/helper/archive_client.py
+++ b/helper/archive_client.py
@@ -0,0 +1,567 @@
 """Archive.org API client for borrowing and downloading books.
 This module provides low-level functions for interacting with Archive.org:
 - Authentication (login, credential management)
 - Borrowing (loan, return_loan)
 - Book metadata extraction (get_book_infos, get_book_metadata)
 - Image downloading and deobfuscation
 - PDF creation with metadata
 Used by unified_book_downloader.py for the borrowing workflow.
 """
 from __future__ import annotations
 import base64
 import hashlib
 import logging
 import os
 import re
 import sys
 import time
 from concurrent import futures
 from typing import Any, Dict, List, Optional, Sequence, Tuple
 import requests
 from helper.logger import log, debug
 try:
    from Crypto.Cipher import AES  # type: ignore
    from Crypto.Util import Counter  # type: ignore
 except ImportError:
    AES = None  # type: ignore
    Counter = None  # type: ignore
 try:
    from tqdm import tqdm  # type: ignore
 except ImportError:
    tqdm = None  # type: ignore
 def credential_openlibrary(config: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
    """Get OpenLibrary/Archive.org email and password from config.
    Supports both formats:
    - New: {"provider": {"openlibrary": {"email": "...", "password": "..."}}}
    - Old: {"Archive": {"email": "...", "password": "..."}}
           {"archive_org_email": "...", "archive_org_password": "..."}
    Returns: (email, password) tuple, each can be None
    """
    if not isinstance(config, dict):
        return None, None
    # Try new format first
    provider_config = config.get("provider", {})
    if isinstance(provider_config, dict):
        openlibrary_config = provider_config.get("openlibrary", {})
        if isinstance(openlibrary_config, dict):
            email = openlibrary_config.get("email")
            password = openlibrary_config.get("password")
            if email or password:
                return email, password
    # Try old nested format
    archive_config = config.get("Archive")
    if isinstance(archive_config, dict):
        email = archive_config.get("email")
        password = archive_config.get("password")
        if email or password:
            return email, password
    # Fall back to old flat format
    email = config.get("archive_org_email")
    password = config.get("archive_org_password")
    return email, password
 def display_error(response: requests.Response, message: str) -> None:
    """Display error and exit."""
    log(message, file=sys.stderr)
    log(response.text, file=sys.stderr)
    sys.exit(1)
 def login(email: str, password: str) -> requests.Session:
    """Login to archive.org.
    Args:
        email: Archive.org email
        password: Archive.org password
    Returns:
        Authenticated requests.Session
    Raises:
        SystemExit on login failure
    """
    session = requests.Session()
    session.get("https://archive.org/account/login", timeout=30)
    data = {"username": email, "password": password}
    response = session.post("https://archive.org/account/login", data=data, timeout=30)
    if "bad_login" in response.text:
        log("Invalid credentials!", file=sys.stderr)
        sys.exit(1)
    if "Successful login" in response.text:
        debug("Successful login")
        return session
    display_error(response, "[-] Error while login:")
    sys.exit(1)  # Unreachable but satisfies type checker
 def loan(session: requests.Session, book_id: str, verbose: bool = True) -> requests.Session:
    """Borrow a book from archive.org (14-day loan).
    Args:
        session: Authenticated requests.Session from login()
        book_id: Archive.org book identifier (e.g., 'ia_book_id')
        verbose: Whether to log messages
    Returns:
        Session with active loan
    Raises:
        SystemExit on loan failure
    """
    data = {"action": "grant_access", "identifier": book_id}
    response = session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
    data["action"] = "browse_book"
    response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
    if response.status_code == 400:
        try:
            if response.json()["error"] == "This book is not available to borrow at this time. Please try again later.":
                debug("This book doesn't need to be borrowed")
                return session
            display_error(response, "Something went wrong when trying to borrow the book.")
        except:
            display_error(response, "The book cannot be borrowed")
    data["action"] = "create_token"
    response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
    if "token" in response.text:
        if verbose:
            debug("Successful loan")
        return session
    display_error(response, "Something went wrong when trying to borrow the book.")
    sys.exit(1)  # Unreachable but satisfies type checker
 def return_loan(session: requests.Session, book_id: str) -> None:
    """Return a borrowed book.
    Args:
        session: Authenticated requests.Session with active loan
        book_id: Archive.org book identifier
    """
    data = {"action": "return_loan", "identifier": book_id}
    response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
    if response.status_code == 200 and response.json()["success"]:
        debug("Book returned")
    else:
        display_error(response, "Something went wrong when trying to return the book")
 def get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
    """Extract book information and page links from archive.org viewer.
    Args:
        session: Authenticated requests.Session
        url: Book URL (e.g., https://archive.org/borrow/book_id or /details/book_id)
    Returns:
        Tuple of (title, page_links, metadata)
    Raises:
        RuntimeError: If page data cannot be extracted
    """
    r = session.get(url, timeout=30).text
    # Try to extract the infos URL from the response
    try:
        # Look for the "url" field in the response
        if '"url":"' not in r:
            raise ValueError("No 'url' field found in response")
        infos_url = "https:" + r.split('"url":"')[1].split('"')[0].replace("\\u0026", "&")
    except (IndexError, ValueError) as e:
        # If URL extraction fails, raise with better error message
        raise RuntimeError(f"Failed to extract book info URL from response: {e}")
    response = session.get(infos_url, timeout=30)
    data = response.json()["data"]
    title = data["brOptions"]["bookTitle"].strip().replace(" ", "_")
    title = "".join(c for c in title if c not in '<>:"/\\|?*')  # Filter forbidden chars
    title = title[:150]  # Trim to avoid long file names
    metadata = data["metadata"]
    links = []
    # Safely extract page links from brOptions data
    try:
        br_data = data.get("brOptions", {}).get("data", [])
        for item in br_data:
            if isinstance(item, list):
                for page in item:
                    if isinstance(page, dict) and "uri" in page:
                        links.append(page["uri"])
            elif isinstance(item, dict) and "uri" in item:
                links.append(item["uri"])
    except (KeyError, IndexError, TypeError) as e:
        log(f"Warning: Error parsing page links: {e}", file=sys.stderr)
        # Continue with whatever links we found
    if len(links) > 1:
        debug(f"Found {len(links)} pages")
        return title, links, metadata
    elif len(links) == 1:
        debug(f"Found {len(links)} page")
        return title, links, metadata
    else:
        log("Error while getting image links - no pages found", file=sys.stderr)
        raise RuntimeError("No pages found in book data")
 def image_name(pages: int, page: int, directory: str) -> str:
    """Generate image filename for page.
    Args:
        pages: Total number of pages
        page: Current page number (0-indexed)
        directory: Directory to save to
    Returns:
        Full path to image file
    """
    return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
 def deobfuscate_image(image_data: bytes, link: str, obf_header: str) -> bytes:
    """Decrypt obfuscated image data using AES-CTR.
    This handles Archive.org's image obfuscation for borrowed books.
    Based on: https://github.com/justimm
    Args:
        image_data: Encrypted image bytes
        link: Image URL (used to derive AES key)
        obf_header: X-Obfuscate header value (format: "1|BASE64_COUNTER")
    Returns:
        Decrypted image bytes
    """
    if not AES or not Counter:
        raise RuntimeError("Crypto library not available")
    try:
        version, counter_b64 = obf_header.split("|")
    except Exception as e:
        raise ValueError("Invalid X-Obfuscate header format") from e
    if version != "1":
        raise ValueError("Unsupported obfuscation version: " + version)
    # Derive AES key from URL
    aesKey = re.sub(r"^https?:\/\/.*?\/", "/", link)
    sha1_digest = hashlib.sha1(aesKey.encode("utf-8")).digest()
    key = sha1_digest[:16]
    # Decode counter
    counter_bytes = base64.b64decode(counter_b64)
    if len(counter_bytes) != 16:
        raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
    prefix = counter_bytes[:8]
    initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
    # Create AES-CTR cipher
    ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False)  # type: ignore
    cipher = AES.new(key, AES.MODE_CTR, counter=ctr)  # type: ignore
    decrypted_part = cipher.decrypt(image_data[:1024])
    new_data = decrypted_part + image_data[1024:]
    return new_data
 def download_one_image(
    session: requests.Session,
    link: str,
    i: int,
    directory: str,
    book_id: str,
    pages: int,
 ) -> None:
    """Download a single book page image.
    Handles obfuscated images and re-borrowing on 403 errors.
    Args:
        session: Authenticated requests.Session
        link: Direct image URL
        i: Page index (0-based)
        directory: Directory to save to
        book_id: Archive.org book ID (for re-borrowing on 403)
        pages: Total number of pages
    """
    headers = {
        "Referer": "https://archive.org/",
        "Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
        "Sec-Fetch-Site": "same-site",
        "Sec-Fetch-Mode": "no-cors",
        "Sec-Fetch-Dest": "image",
    }
    retry = True
    response = None
    while retry:
        try:
            response = session.get(link, headers=headers, timeout=30)
            if response.status_code == 403:
                session = loan(session, book_id, verbose=False)
                raise Exception("Borrow again")
            if response.status_code == 200:
                retry = False
        except:
            time.sleep(1)
    image = image_name(pages, i, directory)
    if response is None:
        log(f"Failed to download page {i}", file=sys.stderr)
        return
    obf_header = response.headers.get("X-Obfuscate")
    image_content = None
    if obf_header:
        try:
            image_content = deobfuscate_image(response.content, link, obf_header)
        except Exception as e:
            log(f"Deobfuscation failed: {e}", file=sys.stderr)
            return
    else:
        image_content = response.content
    with open(image, "wb") as f:
        f.write(image_content)
 def download(
    session: requests.Session,
    n_threads: int,
    directory: str,
    links: List[str],
    scale: int,
    book_id: str,
 ) -> List[str]:
    """Download all book pages as images.
    Uses thread pool for parallel downloads.
    Args:
        session: Authenticated requests.Session
        n_threads: Number of download threads
        directory: Directory to save images to
        links: List of image URLs
        scale: Image resolution (0=highest, 10=lowest)
        book_id: Archive.org book ID (for re-borrowing)
    Returns:
        List of downloaded image file paths
    """
    debug("Downloading pages...")
    links = [f"{link}&rotate=0&scale={scale}" for link in links]
    pages = len(links)
    tasks = []
    with futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
        for link in links:
            i = links.index(link)
            tasks.append(
                executor.submit(
                    download_one_image,
                    session=session,
                    link=link,
                    i=i,
                    directory=directory,
                    book_id=book_id,
                    pages=pages,
                )
            )
        if tqdm:
            for _ in tqdm(futures.as_completed(tasks), total=len(tasks)):  # type: ignore
                pass
        else:
            for _ in futures.as_completed(tasks):
                pass
    images = [image_name(pages, i, directory) for i in range(len(links))]
    return images
 def check_direct_download(book_id: str) -> Tuple[bool, str]:
    """Check if a book can be downloaded directly without borrowing.
    Searches Archive.org metadata for downloadable PDF files.
    Args:
        book_id: Archive.org book identifier
    Returns:
        Tuple of (can_download: bool, pdf_url: str)
    """
    try:
        # First, try to get the metadata to find the actual PDF filename
        metadata_url = f"https://archive.org/metadata/{book_id}"
        response = requests.get(metadata_url, timeout=10)
        response.raise_for_status()
        metadata = response.json()
        # Find PDF file in files list
        if "files" in metadata:
            for file_info in metadata["files"]:
                filename = file_info.get("name", "")
                if filename.endswith(".pdf") and file_info.get("source") == "original":
                    # Found the original PDF
                    pdf_filename = filename
                    pdf_url = f"https://archive.org/download/{book_id}/{pdf_filename.replace(' ', '%20')}"
                    # Verify it's accessible
                    check_response = requests.head(pdf_url, timeout=5, allow_redirects=True)
                    if check_response.status_code == 200:
                        return True, pdf_url
        return False, ""
    except Exception as e:
        log(f"Error checking direct download: {e}", file=sys.stderr)
        return False, ""
 def get_openlibrary_by_isbn(isbn: str) -> Dict[str, Any]:
    """Fetch book data from OpenLibrary using ISBN.
    Args:
        isbn: ISBN-10 or ISBN-13 to search for
    Returns:
        Dictionary with book metadata from OpenLibrary
    """
    try:
        # Try ISBN API first
        api_url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn}&jscmd=data&format=json"
        response = requests.get(api_url, timeout=10)
        response.raise_for_status()
        data = response.json()
        if data:
            # Get first result
            key = list(data.keys())[0]
            return data[key]
        return {}
    except Exception as e:
        log(f"Error fetching OpenLibrary data by ISBN: {e}", file=sys.stderr)
        return {}
 def extract_isbn_from_metadata(metadata: Dict[str, Any]) -> str:
    """Extract ISBN from archive.org metadata.
    Looks for ISBN in various metadata fields.
    Args:
        metadata: Archive.org metadata dictionary
    Returns:
        ISBN string (clean, no hyphens) or empty string if not found
    """
    # Try various common metadata fields
    isbn_fields = [
        "isbn", "ISBN", "isbn_13", "isbn_10", "isbns",
        "isbn-10", "isbn-13", "identifer_isbn"
    ]
    for field in isbn_fields:
        if field in metadata:
            isbn_val = metadata[field]
            if isinstance(isbn_val, list):
                isbn_val = isbn_val[0] if isbn_val else None
            if isbn_val and isinstance(isbn_val, str):
                # Clean ISBN (remove hyphens, spaces)
                isbn_clean = isbn_val.replace("-", "").replace(" ", "")
                if len(isbn_clean) in [10, 13]:
                    return isbn_clean
    return ""
 def normalize_url(url: str) -> str:
    """Convert openlibrary.org URL to archive.org URL.
    Looks up the actual Archive.org ID from OpenLibrary API.
    Args:
        url: Book URL (archive.org or openlibrary.org format)
    Returns:
        Normalized archive.org URL
    """
    url = url.strip()
    # Already archive.org format
    if url.startswith("https://archive.org/details/"):
        return url
    # Convert openlibrary.org format by querying the OpenLibrary API
    if "openlibrary.org/books/" in url:
        try:
            # Extract the book ID (e.g., OL6796852M)
            parts = url.split("/books/")
            if len(parts) > 1:
                book_id = parts[1].split("/")[0]
                # Query OpenLibrary API to get the book metadata
                api_url = f"https://openlibrary.org/books/{book_id}.json"
                response = requests.get(api_url, timeout=10)
                response.raise_for_status()
                data = response.json()
                # Look for identifiers including internet_archive or ocaid
                # First try ocaid (Open Content Alliance ID) - this is most common
                if "ocaid" in data:
                    ocaid = data["ocaid"]
                    return f"https://archive.org/details/{ocaid}"
                # Check for identifiers object
                if "identifiers" in data:
                    identifiers = data["identifiers"]
                    # Look for internet_archive ID
                    if "internet_archive" in identifiers:
                        ia_ids = identifiers["internet_archive"]
                        if isinstance(ia_ids, list) and ia_ids:
                            ia_id = ia_ids[0]
                        else:
                            ia_id = ia_ids
                        return f"https://archive.org/details/{ia_id}"
                # If no IA identifier found, use the book ID as fallback
                log(f"No Internet Archive ID found for {book_id}. Attempting with OpenLibrary ID.", file=sys.stderr)
                return f"https://archive.org/details/{book_id}"
        except requests.RequestException as e:
            log(f"Could not fetch OpenLibrary metadata: {e}", file=sys.stderr)
            # Fallback to using the book ID directly
            parts = url.split("/books/")
            if len(parts) > 1:
                book_id = parts[1].split("/")[0]
                return f"https://archive.org/details/{book_id}"
        except (KeyError, IndexError) as e:
            log(f"Error parsing OpenLibrary response: {e}", file=sys.stderr)
            # Fallback to using the book ID directly
            parts = url.split("/books/")
            if len(parts) > 1:
                book_id = parts[1].split("/")[0]
                return f"https://archive.org/details/{book_id}"
    # Return original if can't parse
    return url
--- a/helper/download.py
+++ b/helper/download.py
@@ -0,0 +1,730 @@
 """Download media files using yt-dlp with support for direct file downloads.
 Lean, focused downloader without event infrastructure overhead.
 - yt-dlp integration for streaming sites
 - Direct file download fallback for PDFs, images, documents
 - Tag extraction via metadata.extract_ytdlp_tags()
 - Logging via helper.logger.log()
 """
 from __future__ import annotations
 import re  # noqa: F401
 import sys
 import time
 import traceback
 from pathlib import Path
 from typing import Any, Dict, Iterator, List, Optional
 from urllib.parse import urljoin
 import httpx
 from helper.logger import log, debug
 from .utils import ensure_directory, sha256_file
 from .http_client import HTTPClient
 from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar
 try:
    import yt_dlp  # type: ignore
    from yt_dlp.extractor import gen_extractors  # type: ignore
 except Exception as exc:
    yt_dlp = None  # type: ignore
    YTDLP_IMPORT_ERROR = exc
 else:
    YTDLP_IMPORT_ERROR = None
 try:
    from metadata import extract_ytdlp_tags
 except ImportError:
    extract_ytdlp_tags = None
 _EXTRACTOR_CACHE: List[Any] | None = None
 def _ensure_yt_dlp_ready() -> None:
    """Verify yt-dlp is available, raise if not."""
    if yt_dlp is not None:
        return
    detail = str(YTDLP_IMPORT_ERROR or "yt-dlp is not installed")
    raise DownloadError(f"yt-dlp module not available: {detail}")
 def _progress_callback(status: Dict[str, Any]) -> None:
    """Simple progress callback using logger."""
    event = status.get("status")
    if event == "downloading":
        percent = status.get("_percent_str", "?")
        speed = status.get("_speed_str", "?")
        debug(f"Downloading {percent} at {speed}")
    elif event == "finished":
        debug(f"✓ Download finished: {status.get('filename')}")
    elif event in ("postprocessing", "processing"):
        debug(f"Post-processing: {status.get('postprocessor')}")
 def is_url_supported_by_ytdlp(url: str) -> bool:
    """Check if URL is supported by yt-dlp."""
    if yt_dlp is None:
        return False
    global _EXTRACTOR_CACHE
    if _EXTRACTOR_CACHE is None:
        try:
            _EXTRACTOR_CACHE = [ie for ie in gen_extractors()]  # type: ignore[arg-type]
        except Exception:
            _EXTRACTOR_CACHE = []
    for extractor in _EXTRACTOR_CACHE:
        try:
            if not extractor.suitable(url):
                continue
        except Exception:
            continue
        name = getattr(extractor, "IE_NAME", "")
        if name.lower() == "generic":
            continue
        return True
    return False
 def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
    """Get list of available formats for a URL using yt-dlp.
    Args:
        url: URL to get formats for
        no_playlist: If True, ignore playlists and list formats for single video
        playlist_items: If specified, only list formats for these playlist items (e.g., "1,3,5-8")
    Returns:
        List of format dictionaries with keys: format_id, format, resolution, fps, vcodec, acodec, filesize, etc.
        Returns None if yt-dlp is not available or format listing fails.
    """
    _ensure_yt_dlp_ready()
    try:
        ydl_opts = {
            "quiet": False,
            "no_warnings": False,
            "socket_timeout": 30,
        }
        # Add no_playlist option if specified
        if no_playlist:
            ydl_opts["noplaylist"] = True
        # Add playlist_items filter if specified
        if playlist_items:
            ydl_opts["playlist_items"] = playlist_items
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            debug(f"Fetching format list for: {url}")
            info = ydl.extract_info(url, download=False)
            formats = info.get("formats", [])
            if not formats:
                log("No formats available", file=sys.stderr)
                return None
            # Parse and extract relevant format info
            result_formats = []
            for fmt in formats:
                format_info = {
                    "format_id": fmt.get("format_id", ""),
                    "format": fmt.get("format", ""),
                    "ext": fmt.get("ext", ""),
                    "resolution": fmt.get("resolution", ""),
                    "width": fmt.get("width"),
                    "height": fmt.get("height"),
                    "fps": fmt.get("fps"),
                    "vcodec": fmt.get("vcodec", "none"),
                    "acodec": fmt.get("acodec", "none"),
                    "filesize": fmt.get("filesize"),
                    "tbr": fmt.get("tbr"),  # Total bitrate
                }
                result_formats.append(format_info)
            debug(f"Found {len(result_formats)} available formats")
            return result_formats
    except Exception as e:
        log(f"✗ Error fetching formats: {e}", file=sys.stderr)
        return None
 def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
    """Build yt-dlp download options."""
    ensure_directory(opts.output_dir)
    outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve())
    base_options: Dict[str, Any] = {
        "outtmpl": outtmpl,
        "quiet": False,
        "no_warnings": False,
        "noprogress": False,
        "socket_timeout": 30,
        "retries": 10,
        "fragment_retries": 10,
        "http_chunk_size": 10_485_760,
        "restrictfilenames": True,
        "progress_hooks": [_progress_callback],
    }
    if opts.cookies_path and opts.cookies_path.is_file():
        base_options["cookiefile"] = str(opts.cookies_path)
    # Add no-playlist option if specified (for single video from playlist URLs)
    if opts.no_playlist:
        base_options["noplaylist"] = True
    # Configure based on mode
    if opts.mode == "audio":
        base_options["format"] = opts.ytdl_format or "251/140/bestaudio"
        base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}]
    else:  # video
        base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
        base_options["format_sort"] = [
            "res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"
        ]
    # Add clip sections if provided
    if opts.clip_sections:
        base_options["download_sections"] = opts.clip_sections
    # Add playlist items selection if provided
    if opts.playlist_items:
        base_options["playlist_items"] = opts.playlist_items
    debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}")
    return base_options
 def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
    """Iterate through download entries, handling playlists."""
    queue: List[Dict[str, Any]] = [info]
    seen: set[int] = set()
    while queue:
        current = queue.pop(0)
        obj_id = id(current)
        if obj_id in seen:
            continue
        seen.add(obj_id)
        entries = current.get("entries")
        if isinstance(entries, list):
            for entry in entries:
                if isinstance(entry, dict):
                    queue.append(entry)
        if current.get("requested_downloads") or not entries:
            yield current
 def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]:
    """Get candidate file paths for downloaded media."""
    requested = entry.get("requested_downloads")
    if isinstance(requested, list):
        for item in requested:
            if isinstance(item, dict):
                for key in ("filepath", "_filename", "filename"):
                    value = item.get(key)
                    if value:
                        yield Path(value)
    for key in ("filepath", "_filename", "filename"):
        value = entry.get(key)
        if value:
            yield Path(value)
    if entry.get("filename"):
        yield output_dir / entry["filename"]
 def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]:
    """Find downloaded file in yt-dlp metadata."""
    for entry in _iter_download_entries(info):
        for candidate in _candidate_paths(entry, output_dir):
            if candidate.is_file():
                return entry, candidate
            if not candidate.is_absolute():
                resolved = output_dir / candidate
                if resolved.is_file():
                    return entry, resolved
    raise FileNotFoundError("yt-dlp did not report a downloaded media file")
 def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
    """Extract SHA256 hash from yt-dlp metadata."""
    for payload in [info] + info.get("entries", []):
        if not isinstance(payload, dict):
            continue
        hashes = payload.get("hashes")
        if isinstance(hashes, dict):
            for key in ("sha256", "sha-256", "sha_256"):
                value = hashes.get(key)
                if isinstance(value, str) and value.strip():
                    return value.strip().lower()
        for key in ("sha256", "sha-256", "sha_256"):
            value = payload.get(key)
            if isinstance(value, str) and value.strip():
                return value.strip().lower()
    return None
 def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
    """Extract the actual download link from LibGen redirect URL.
    LibGen URLs like https://libgen.gl/file.php?id=123456 redirect to
    actual mirror URLs. This follows the redirect chain to get the real file.
    Args:
        libgen_url: LibGen file.php URL
    Returns:
        Actual download URL or None if extraction fails
    """
    try:
        import requests
        from urllib.parse import urlparse
        # Check if this is a LibGen URL
        parsed = urlparse(libgen_url)
        if 'libgen' not in parsed.netloc.lower():
            return None
        if '/file.php' not in parsed.path.lower():
            return None
        # LibGen redirects to actual mirrors, follow redirects to get final URL
        session = requests.Session()
        session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        })
        debug(f"Following LibGen redirect chain for: {libgen_url}")
        # First, get the page and look for direct download link
        try:
            response = session.get(libgen_url, timeout=10, allow_redirects=True)
            final_url = response.url
            # Try to find actual download link in the page
            try:
                from bs4 import BeautifulSoup
                soup = BeautifulSoup(response.content, 'html.parser')
                # Look for download links - LibGen typically has forms with download buttons
                # Look for all links and forms that might lead to download
                for link in soup.find_all('a'):
                    href = link.get('href')
                    if href and isinstance(href, str):
                        # Look for direct file links or get.php redirects
                        if 'get.php' in href.lower() or href.endswith(('.pdf', '.epub', '.djvu', '.mobi')):
                            download_url = href if href.startswith('http') else urljoin(final_url, href)
                            debug(f"Found download link: {download_url}")
                            return download_url
            except ImportError:
                pass  # BeautifulSoup not available
            # If we followed redirects successfully, return the final URL
            # This handles cases where libgen redirects to a direct download mirror
            if final_url != libgen_url:
                debug(f"LibGen resolved to mirror: {final_url}")
                return final_url
        except requests.RequestException as e:
            log(f"Error following LibGen redirects: {e}", file=sys.stderr)
            # Try head request as fallback
            try:
                response = session.head(libgen_url, allow_redirects=True, timeout=10)
                if response.url != libgen_url:
                    debug(f"LibGen HEAD resolved to: {response.url}")
                    return response.url
            except:
                pass
        return None
    except Exception as e:
        log(f"Error resolving LibGen URL: {e}", file=sys.stderr)
        return None
 def _download_direct_file(
    url: str,
    output_dir: Path,
    debug_logger: Optional[DebugLogger] = None,
 ) -> DownloadMediaResult:
    """Download a direct file (PDF, image, document, etc.) without yt-dlp."""
    ensure_directory(output_dir)
    from urllib.parse import unquote, urlparse, parse_qs
    import re
    # Extract filename from URL
    parsed_url = urlparse(url)
    url_path = parsed_url.path
    # Try to get filename from query parameters first (for LibGen and similar services)
    # e.g., ?filename=Book+Title.pdf or &download=filename.pdf
    filename = None
    if parsed_url.query:
        query_params = parse_qs(parsed_url.query)
        for param_name in ('filename', 'download', 'file', 'name'):
            if param_name in query_params and query_params[param_name]:
                filename = query_params[param_name][0]
                filename = unquote(filename)
                break
    # If not found in query params, extract from URL path
    if not filename or not filename.strip():
        filename = url_path.split("/")[-1] if url_path else ""
        filename = unquote(filename)
    # Remove query strings from filename if any
    if "?" in filename:
        filename = filename.split("?")[0]
    # Try to get real filename from Content-Disposition header (HEAD request)
    try:
        with HTTPClient(timeout=10.0) as client:
            response = client._request("HEAD", url, follow_redirects=True)
            content_disposition = response.headers.get("content-disposition", "")
            if content_disposition:
                # Extract filename from Content-Disposition header
                # Format: attachment; filename="filename.pdf" or filename=filename.pdf
                match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
                if match:
                    extracted_name = match.group(1) or match.group(2)
                    if extracted_name:
                        filename = unquote(extracted_name)
                        debug(f"Filename from Content-Disposition: {filename}")
    except Exception as e:
        log(f"Could not get filename from headers: {e}", file=sys.stderr)
    # Fallback if we still don't have a good filename
    if not filename or "." not in filename:
        filename = "downloaded_file.bin"
    file_path = output_dir / filename
    progress_bar = ProgressBar()
    debug(f"Direct download: {filename}")
    try:
        start_time = time.time()
        downloaded_bytes = [0]
        total_bytes = [0]
        last_progress_time = [start_time]
        def progress_callback(bytes_downloaded: int, content_length: int) -> None:
            downloaded_bytes[0] = bytes_downloaded
            total_bytes[0] = content_length
            now = time.time()
            if now - last_progress_time[0] >= 0.5 and total_bytes[0] > 0:
                elapsed = now - start_time
                percent = (bytes_downloaded / content_length) * 100 if content_length > 0 else 0
                speed = bytes_downloaded / elapsed if elapsed > 0 else 0
                eta_seconds = (content_length - bytes_downloaded) / speed if speed > 0 else 0
                speed_str = progress_bar.format_bytes(speed) + "/s"
                minutes, seconds = divmod(int(eta_seconds), 60)
                hours, minutes = divmod(minutes, 60)
                eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
                progress_line = progress_bar.format_progress(
                    percent_str=f"{percent:.1f}%",
                    downloaded=bytes_downloaded,
                    total=content_length,
                    speed_str=speed_str,
                    eta_str=eta_str,
                )
                debug(progress_line)
                last_progress_time[0] = now
        with HTTPClient(timeout=30.0) as client:
            client.download(url, str(file_path), progress_callback=progress_callback)
        elapsed = time.time() - start_time
        avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
        debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}")
        # For direct file downloads, create minimal info dict without filename as title
        # This prevents creating duplicate title: tags when filename gets auto-generated
        # We'll add title back later only if we couldn't extract meaningful tags
        info = {
            "id": filename.rsplit(".", 1)[0],
            "ext": filename.rsplit(".", 1)[1] if "." in filename else "bin",
            "webpage_url": url,
        }
        hash_value = None
        try:
            hash_value = sha256_file(file_path)
        except Exception:
            pass
        tags = []
        if extract_ytdlp_tags:
            try:
                tags = extract_ytdlp_tags(info)
            except Exception as e:
                log(f"Error extracting tags: {e}", file=sys.stderr)
        # Only use filename as a title tag if we couldn't extract any meaningful tags
        # This prevents duplicate title: tags when the filename could be mistaken for metadata
        if not any(t.startswith('title:') for t in tags):
            # Re-extract tags with filename as title only if needed
            info['title'] = filename
            tags = []
            if extract_ytdlp_tags:
                try:
                    tags = extract_ytdlp_tags(info)
                except Exception as e:
                    log(f"Error extracting tags with filename: {e}", file=sys.stderr)
        if debug_logger is not None:
            debug_logger.write_record(
                "direct-file-downloaded",
                {"url": url, "path": str(file_path), "hash": hash_value},
            )
        return DownloadMediaResult(
            path=file_path,
            info=info,
            tags=tags,
            source_url=url,
            hash_value=hash_value,
        )
    except (httpx.HTTPError, httpx.RequestError) as exc:
        log(f"Download error: {exc}", file=sys.stderr)
        if debug_logger is not None:
            debug_logger.write_record(
                "exception",
                {"phase": "direct-file", "url": url, "error": str(exc)},
            )
        raise DownloadError(f"Failed to download {url}: {exc}") from exc
    except Exception as exc:
        log(f"Error downloading file: {exc}", file=sys.stderr)
        if debug_logger is not None:
            debug_logger.write_record(
                "exception",
                {
                    "phase": "direct-file",
                    "url": url,
                    "error": str(exc),
                    "traceback": traceback.format_exc(),
                },
            )
        raise DownloadError(f"Error downloading file: {exc}") from exc
 def probe_url(url: str, no_playlist: bool = False) -> Optional[Dict[str, Any]]:
    """Probe URL to extract metadata WITHOUT downloading.
    Args:
        url: URL to probe
        no_playlist: If True, ignore playlists and probe only the single video
    Returns:
        Dict with keys: extractor, title, entries (if playlist), duration, etc.
        Returns None if not supported by yt-dlp.
    """
    if not is_url_supported_by_ytdlp(url):
        return None
    _ensure_yt_dlp_ready()
    assert yt_dlp is not None
    try:
        # Extract info without downloading
        # Use extract_flat='in_playlist' to get full metadata for playlist items
        ydl_opts = {
            "quiet": True,  # Suppress all output
            "no_warnings": True,
            "socket_timeout": 10,
            "retries": 3,
            "skip_download": True,  # Don't actually download
            "extract_flat": "in_playlist",  # Get playlist with metadata for each entry
            "noprogress": True,  # No progress bars
            "quiet": True,
        }
        # Add no_playlist option if specified
        if no_playlist:
            ydl_opts["noplaylist"] = True
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:  # type: ignore[arg-type]
            info = ydl.extract_info(url, download=False)
        if not isinstance(info, dict):
            return None
        # Extract relevant fields
        return {
            "extractor": info.get("extractor", ""),
            "title": info.get("title", ""),
            "entries": info.get("entries", []),  # Will be populated if playlist
            "duration": info.get("duration"),
            "uploader": info.get("uploader"),
            "description": info.get("description"),
            "url": url,
        }
    except Exception as exc:
        log(f"Probe failed for {url}: {exc}")
        return None
 def download_media(
    opts: DownloadOptions,
    *,
    debug_logger: Optional[DebugLogger] = None,
 ) -> DownloadMediaResult:
    """Download media from URL using yt-dlp or direct HTTP download.
    Args:
        opts: DownloadOptions with url, mode, output_dir, etc.
        debug_logger: Optional debug logger for troubleshooting
    Returns:
        DownloadMediaResult with path, info, tags, hash
    Raises:
        DownloadError: If download fails
    """
    # Handle LibGen URLs specially
    # file.php redirects to mirrors, get.php is direct from modern API
    if 'libgen' in opts.url.lower():
        if '/get.php' in opts.url.lower():
            # Modern API get.php links are direct downloads from mirrors (not file redirects)
            log(f"Detected LibGen get.php URL, downloading directly...")
            if debug_logger is not None:
                debug_logger.write_record("libgen-direct", {"url": opts.url})
            return _download_direct_file(opts.url, opts.output_dir, debug_logger)
        elif '/file.php' in opts.url.lower():
            # Old-style file.php redirects to mirrors, we need to resolve
            log(f"Detected LibGen file.php URL, resolving to actual mirror...")
            actual_url = _get_libgen_download_url(opts.url)
            if actual_url and actual_url != opts.url:
                log(f"Resolved LibGen URL to mirror: {actual_url}")
                opts.url = actual_url
                # After resolution, this will typically be an onion link or direct file
                # Skip yt-dlp for this (it won't support onion/mirrors), go direct
                if debug_logger is not None:
                    debug_logger.write_record("libgen-resolved", {"original": opts.url, "resolved": actual_url})
                return _download_direct_file(opts.url, opts.output_dir, debug_logger)
            else:
                log(f"Could not resolve LibGen URL, trying direct download anyway", file=sys.stderr)
                if debug_logger is not None:
                    debug_logger.write_record("libgen-resolve-failed", {"url": opts.url})
                return _download_direct_file(opts.url, opts.output_dir, debug_logger)
    # Try yt-dlp first if URL is supported
    if not is_url_supported_by_ytdlp(opts.url):
        log(f"URL not supported by yt-dlp, trying direct download: {opts.url}")
        if debug_logger is not None:
            debug_logger.write_record("direct-file-attempt", {"url": opts.url})
        return _download_direct_file(opts.url, opts.output_dir, debug_logger)
    _ensure_yt_dlp_ready()
    ytdl_options = _build_ytdlp_options(opts)
    log(f"Starting yt-dlp download: {opts.url}")
    if debug_logger is not None:
        debug_logger.write_record("ytdlp-start", {"url": opts.url})
    assert yt_dlp is not None
    try:
        with yt_dlp.YoutubeDL(ytdl_options) as ydl:  # type: ignore[arg-type]
            info = ydl.extract_info(opts.url, download=True)
    except Exception as exc:
        log(f"yt-dlp failed: {exc}", file=sys.stderr)
        if debug_logger is not None:
            debug_logger.write_record(
                "exception",
                {
                    "phase": "yt-dlp",
                    "error": str(exc),
                    "traceback": traceback.format_exc(),
                },
            )
        raise DownloadError("yt-dlp download failed") from exc
    if not isinstance(info, dict):
        log(f"Unexpected yt-dlp response: {type(info)}", file=sys.stderr)
        raise DownloadError("Unexpected yt-dlp response type")
    info_dict: Dict[str, Any] = info
    if debug_logger is not None:
        debug_logger.write_record(
            "ytdlp-info",
            {
                "keys": sorted(info_dict.keys()),
                "is_playlist": bool(info_dict.get("entries")),
            },
        )
    try:
        entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir)
    except FileNotFoundError as exc:
        log(f"Error: {exc}", file=sys.stderr)
        if debug_logger is not None:
            debug_logger.write_record(
                "exception",
                {"phase": "resolve-path", "error": str(exc)},
            )
        raise DownloadError(str(exc)) from exc
    if debug_logger is not None:
        debug_logger.write_record(
            "resolved-media",
            {"path": str(media_path), "entry_keys": sorted(entry.keys())},
        )
    # Extract hash from metadata or compute
    hash_value = _extract_sha256(entry) or _extract_sha256(info_dict)
    if not hash_value:
        try:
            hash_value = sha256_file(media_path)
        except OSError as exc:
            if debug_logger is not None:
                debug_logger.write_record(
                    "hash-error",
                    {"path": str(media_path), "error": str(exc)},
                )
    # Extract tags using metadata.py
    tags = []
    if extract_ytdlp_tags:
        try:
            tags = extract_ytdlp_tags(entry)
        except Exception as e:
            log(f"Error extracting tags: {e}", file=sys.stderr)
    source_url = (
        entry.get("webpage_url")
        or entry.get("original_url")
        or entry.get("url")
    )
    log(f"✓ Downloaded: {media_path.name} ({len(tags)} tags)")
    if debug_logger is not None:
        debug_logger.write_record(
            "downloaded",
            {
                "path": str(media_path),
                "tag_count": len(tags),
                "source_url": source_url,
                "sha256": hash_value,
            },
        )
    return DownloadMediaResult(
        path=media_path,
        info=entry,
        tags=tags,
        source_url=source_url,
        hash_value=hash_value,
    )
 __all__ = [
    "download_media",
    "is_url_supported_by_ytdlp",
    "DownloadError",
    "DownloadOptions",
    "DownloadMediaResult",
 ]
--- a/helper/file_server.py
+++ b/helper/file_server.py
@@ -0,0 +1,180 @@
 """Simple HTTP file server for serving files in web mode."""
 import threading
 import socket
 import logging
 from http.server import HTTPServer, SimpleHTTPRequestHandler
 from pathlib import Path
 from typing import Optional
 import mimetypes
 import urllib.parse
 logger = logging.getLogger(__name__)
 # Global server instance
 _file_server: Optional[HTTPServer] = None
 _server_thread: Optional[threading.Thread] = None
 _server_port: int = 8001
 class FileServerHandler(SimpleHTTPRequestHandler):
    """HTTP request handler for file serving."""
    def do_GET(self):
        """Handle GET requests."""
        # Parse the path
        parsed_path = urllib.parse.urlparse(self.path)
        file_path = urllib.parse.unquote(parsed_path.path)
        # Remove leading slash
        if file_path.startswith('/'):
            file_path = file_path[1:]
        # Decode the file path (it's URL encoded)
        try:
            full_path = Path(file_path).resolve()
            # Security check: ensure the path is within allowed directories
            # For now, allow all paths (can be restricted later)
            if full_path.is_file() and full_path.exists():
                # Serve the file
                logger.debug(f"Serving file: {full_path}")
                # Determine content type
                content_type, _ = mimetypes.guess_type(str(full_path))
                if content_type is None:
                    content_type = 'application/octet-stream'
                try:
                    with open(full_path, 'rb') as f:
                        file_content = f.read()
                    self.send_response(200)
                    self.send_header('Content-type', content_type)
                    self.send_header('Content-Length', str(len(file_content)))
                    self.send_header('Content-Disposition', f'attachment; filename="{full_path.name}"')
                    self.end_headers()
                    self.wfile.write(file_content)
                    logger.info(f"Successfully served file: {full_path.name}")
                    return
                except Exception as e:
                    logger.error(f"Error serving file: {e}")
                    self.send_error(500, "Internal server error")
                    return
            else:
                logger.warning(f"File not found: {full_path}")
                self.send_error(404, "File not found")
                return
        except Exception as e:
            logger.error(f"Error handling request: {e}")
            self.send_error(400, "Bad request")
    def log_message(self, format, *args):
        """Override to use our logger instead of stderr."""
        logger.debug(format % args)
 def get_local_ip() -> Optional[str]:
    """Get the local IP address that's accessible from other devices."""
    try:
        # Connect to a remote server to determine local IP
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(("8.8.8.8", 80))
        ip = s.getsockname()[0]
        s.close()
        return ip
    except Exception as e:
        logger.warning(f"Failed to determine local IP: {e}")
        return None
 def start_file_server(port: int = 8001) -> Optional[str]:
    """Start the HTTP file server.
    Args:
        port: Port to serve on
    Returns:
        Server URL if successful, None otherwise
    """
    global _file_server, _server_thread, _server_port
    if _file_server is not None:
        logger.debug(f"File server already running on port {_server_port}")
        local_ip = get_local_ip()
        if local_ip:
            return f"http://{local_ip}:{_server_port}"
        return None
    try:
        _server_port = port
        # Create server
        server_address = ('', port)
        _file_server = HTTPServer(server_address, FileServerHandler)
        # Start in daemon thread
        _server_thread = threading.Thread(target=_file_server.serve_forever, daemon=True)
        _server_thread.start()
        logger.info(f"File server started on port {port}")
        # Get local IP
        local_ip = get_local_ip()
        if local_ip:
            server_url = f"http://{local_ip}:{port}"
            logger.info(f"File server accessible at: {server_url}")
            return server_url
        else:
            logger.warning("Could not determine local IP")
            return None
    except Exception as e:
        logger.error(f"Failed to start file server: {e}")
        _file_server = None
        _server_thread = None
        return None
 def stop_file_server():
    """Stop the HTTP file server."""
    global _file_server, _server_thread
    if _file_server is not None:
        try:
            _file_server.shutdown()
            _file_server.server_close()
            logger.info("File server stopped")
        except Exception as e:
            logger.error(f"Error stopping file server: {e}")
        finally:
            _file_server = None
            _server_thread = None
 def get_file_url(file_path: Path, server_url: Optional[str] = None) -> Optional[str]:
    """Get the HTTP URL for a file.
    Args:
        file_path: Path to the file
        server_url: Base server URL (gets determined if None)
    Returns:
        HTTP URL to the file, or None if server not running
    """
    if not file_path.exists():
        logger.warning(f"File does not exist: {file_path}")
        return None
    if server_url is None:
        local_ip = get_local_ip()
        if not local_ip:
            logger.error("Cannot determine local IP for file URL")
            return None
        server_url = f"http://{local_ip}:{_server_port}"
    # URL encode the file path
    encoded_path = urllib.parse.quote(str(file_path.resolve()))
    return f"{server_url}/{encoded_path}"
--- a/helper/file_storage.py
+++ b/helper/file_storage.py
--- a/helper/http_client.py
+++ b/helper/http_client.py
@@ -0,0 +1,579 @@
 """
 Unified HTTP client for downlow using httpx.
 Provides synchronous and asynchronous HTTP operations with:
 - Automatic retries on transient failures
 - Configurable timeouts and headers
 - Built-in progress tracking for downloads
 - Request/response logging support
 """
 import httpx
 import asyncio
 from typing import Optional, Dict, Any, Callable, BinaryIO
 from pathlib import Path
 import logging
 logger = logging.getLogger(__name__)
 # Default configuration
 DEFAULT_TIMEOUT = 30.0
 DEFAULT_RETRIES = 3
 DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
 class HTTPClient:
    """Unified HTTP client with sync support."""
    def __init__(
        self,
        timeout: float = DEFAULT_TIMEOUT,
        retries: int = DEFAULT_RETRIES,
        user_agent: str = DEFAULT_USER_AGENT,
        verify_ssl: bool = True,
        headers: Optional[Dict[str, str]] = None,
    ):
        """
        Initialize HTTP client.
        Args:
            timeout: Request timeout in seconds
            retries: Number of retries on transient failures
            user_agent: User-Agent header value
            verify_ssl: Whether to verify SSL certificates
            headers: Additional headers to include in all requests
        """
        self.timeout = timeout
        self.retries = retries
        self.user_agent = user_agent
        self.verify_ssl = verify_ssl
        self.base_headers = headers or {}
        self._client: Optional[httpx.Client] = None
    def __enter__(self):
        """Context manager entry."""
        self._client = httpx.Client(
            timeout=self.timeout,
            verify=self.verify_ssl,
            headers=self._get_headers(),
        )
        return self
    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        if self._client:
            self._client.close()
            self._client = None
    def _get_headers(self) -> Dict[str, str]:
        """Get request headers with user-agent."""
        headers = {"User-Agent": self.user_agent}
        headers.update(self.base_headers)
        return headers
    def get(
        self,
        url: str,
        params: Optional[Dict[str, Any]] = None,
        headers: Optional[Dict[str, str]] = None,
        allow_redirects: bool = True,
    ) -> httpx.Response:
        """
        Make a GET request.
        Args:
            url: Request URL
            params: Query parameters
            headers: Additional headers
            allow_redirects: Follow redirects
        Returns:
            httpx.Response object
        """
        return self._request(
            "GET",
            url,
            params=params,
            headers=headers,
            follow_redirects=allow_redirects,
        )
    def post(
        self,
        url: str,
        data: Optional[Any] = None,
        json: Optional[Dict] = None,
        files: Optional[Dict] = None,
        headers: Optional[Dict[str, str]] = None,
    ) -> httpx.Response:
        """
        Make a POST request.
        Args:
            url: Request URL
            data: Form data
            json: JSON data
            files: Files to upload
            headers: Additional headers
        Returns:
            httpx.Response object
        """
        return self._request(
            "POST",
            url,
            data=data,
            json=json,
            files=files,
            headers=headers,
        )
    def put(
        self,
        url: str,
        data: Optional[Any] = None,
        json: Optional[Dict] = None,
        content: Optional[Any] = None,
        files: Optional[Dict] = None,
        headers: Optional[Dict[str, str]] = None,
    ) -> httpx.Response:
        """
        Make a PUT request.
        Args:
            url: Request URL
            data: Form data
            json: JSON data
            content: Raw content
            files: Files to upload
            headers: Additional headers
        Returns:
            httpx.Response object
        """
        return self._request(
            "PUT",
            url,
            data=data,
            json=json,
            content=content,
            files=files,
            headers=headers,
        )
    def delete(
        self,
        url: str,
        headers: Optional[Dict[str, str]] = None,
    ) -> httpx.Response:
        """
        Make a DELETE request.
        Args:
            url: Request URL
            headers: Additional headers
        Returns:
            httpx.Response object
        """
        return self._request(
            "DELETE",
            url,
            headers=headers,
        )
    def request(
        self,
        method: str,
        url: str,
        **kwargs
    ) -> httpx.Response:
        """
        Make a generic HTTP request.
        Args:
            method: HTTP method
            url: Request URL
            **kwargs: Additional arguments
        Returns:
            httpx.Response object
        """
        return self._request(method, url, **kwargs)
    def download(
        self,
        url: str,
        file_path: str,
        chunk_size: int = 8192,
        progress_callback: Optional[Callable[[int, int], None]] = None,
        headers: Optional[Dict[str, str]] = None,
    ) -> Path:
        """
        Download a file from URL with optional progress tracking.
        Args:
            url: File URL
            file_path: Local file path to save to
            chunk_size: Download chunk size
            progress_callback: Callback(bytes_downloaded, total_bytes)
            headers: Additional headers
        Returns:
            Path object of downloaded file
        """
        path = Path(file_path)
        path.parent.mkdir(parents=True, exist_ok=True)
        with self._request_stream("GET", url, headers=headers, follow_redirects=True) as response:
            response.raise_for_status()
            total_bytes = int(response.headers.get("content-length", 0))
            bytes_downloaded = 0
            with open(path, "wb") as f:
                for chunk in response.iter_bytes(chunk_size):
                    if chunk:
                        f.write(chunk)
                        bytes_downloaded += len(chunk)
                        if progress_callback:
                            progress_callback(bytes_downloaded, total_bytes)
        return path
    def _request(
        self,
        method: str,
        url: str,
        **kwargs
    ) -> httpx.Response:
        """
        Make an HTTP request with automatic retries.
        Args:
            method: HTTP method
            url: Request URL
            **kwargs: Additional arguments for httpx.Client.request()
        Returns:
            httpx.Response object
        """
        if not self._client:
            raise RuntimeError("HTTPClient must be used with context manager (with statement)")
        # Merge headers
        if "headers" in kwargs and kwargs["headers"]:
            headers = self._get_headers()
            headers.update(kwargs["headers"])
            kwargs["headers"] = headers
        else:
            kwargs["headers"] = self._get_headers()
        last_exception = None
        for attempt in range(self.retries):
            try:
                response = self._client.request(method, url, **kwargs)
                response.raise_for_status()
                return response
            except httpx.TimeoutException as e:
                last_exception = e
                logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}")
                if attempt < self.retries - 1:
                    continue
            except httpx.HTTPStatusError as e:
                # Don't retry on 4xx errors
                if 400 <= e.response.status_code < 500:
                    try:
                        response_text = e.response.text[:500]
                    except:
                        response_text = "<unable to read response>"
                    logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
                    raise
                last_exception = e
                try:
                    response_text = e.response.text[:200]
                except:
                    response_text = "<unable to read response>"
                logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
                if attempt < self.retries - 1:
                    continue
            except (httpx.RequestError, httpx.ConnectError) as e:
                last_exception = e
                logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
                if attempt < self.retries - 1:
                    continue
        if last_exception:
            logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
            raise last_exception
        raise RuntimeError("Request failed after retries")
    def _request_stream(self, method: str, url: str, **kwargs):
        """Make a streaming request."""
        if not self._client:
            raise RuntimeError("HTTPClient must be used with context manager (with statement)")
        # Merge headers
        if "headers" in kwargs and kwargs["headers"]:
            headers = self._get_headers()
            headers.update(kwargs["headers"])
            kwargs["headers"] = headers
        else:
            kwargs["headers"] = self._get_headers()
        return self._client.stream(method, url, **kwargs)
 class AsyncHTTPClient:
    """Unified async HTTP client with asyncio support."""
    def __init__(
        self,
        timeout: float = DEFAULT_TIMEOUT,
        retries: int = DEFAULT_RETRIES,
        user_agent: str = DEFAULT_USER_AGENT,
        verify_ssl: bool = True,
        headers: Optional[Dict[str, str]] = None,
    ):
        """
        Initialize async HTTP client.
        Args:
            timeout: Request timeout in seconds
            retries: Number of retries on transient failures
            user_agent: User-Agent header value
            verify_ssl: Whether to verify SSL certificates
            headers: Additional headers to include in all requests
        """
        self.timeout = timeout
        self.retries = retries
        self.user_agent = user_agent
        self.verify_ssl = verify_ssl
        self.base_headers = headers or {}
        self._client: Optional[httpx.AsyncClient] = None
    async def __aenter__(self):
        """Async context manager entry."""
        self._client = httpx.AsyncClient(
            timeout=self.timeout,
            verify=self.verify_ssl,
            headers=self._get_headers(),
        )
        return self
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit."""
        if self._client:
            await self._client.aclose()
            self._client = None
    def _get_headers(self) -> Dict[str, str]:
        """Get request headers with user-agent."""
        headers = {"User-Agent": self.user_agent}
        headers.update(self.base_headers)
        return headers
    async def get(
        self,
        url: str,
        params: Optional[Dict[str, Any]] = None,
        headers: Optional[Dict[str, str]] = None,
        allow_redirects: bool = True,
    ) -> httpx.Response:
        """
        Make an async GET request.
        Args:
            url: Request URL
            params: Query parameters
            headers: Additional headers
            allow_redirects: Follow redirects
        Returns:
            httpx.Response object
        """
        return await self._request(
            "GET",
            url,
            params=params,
            headers=headers,
            follow_redirects=allow_redirects,
        )
    async def post(
        self,
        url: str,
        data: Optional[Any] = None,
        json: Optional[Dict] = None,
        headers: Optional[Dict[str, str]] = None,
    ) -> httpx.Response:
        """
        Make an async POST request.
        Args:
            url: Request URL
            data: Form data
            json: JSON data
            headers: Additional headers
        Returns:
            httpx.Response object
        """
        return await self._request(
            "POST",
            url,
            data=data,
            json=json,
            headers=headers,
        )
    async def download(
        self,
        url: str,
        file_path: str,
        chunk_size: int = 8192,
        progress_callback: Optional[Callable[[int, int], None]] = None,
        headers: Optional[Dict[str, str]] = None,
    ) -> Path:
        """
        Download a file from URL asynchronously with optional progress tracking.
        Args:
            url: File URL
            file_path: Local file path to save to
            chunk_size: Download chunk size
            progress_callback: Callback(bytes_downloaded, total_bytes)
            headers: Additional headers
        Returns:
            Path object of downloaded file
        """
        path = Path(file_path)
        path.parent.mkdir(parents=True, exist_ok=True)
        async with self._request_stream("GET", url, headers=headers) as response:
            response.raise_for_status()
            total_bytes = int(response.headers.get("content-length", 0))
            bytes_downloaded = 0
            with open(path, "wb") as f:
                async for chunk in response.aiter_bytes(chunk_size):
                    if chunk:
                        f.write(chunk)
                        bytes_downloaded += len(chunk)
                        if progress_callback:
                            progress_callback(bytes_downloaded, total_bytes)
        return path
    async def _request(
        self,
        method: str,
        url: str,
        **kwargs
    ) -> httpx.Response:
        """
        Make an async HTTP request with automatic retries.
        Args:
            method: HTTP method
            url: Request URL
            **kwargs: Additional arguments for httpx.AsyncClient.request()
        Returns:
            httpx.Response object
        """
        if not self._client:
            raise RuntimeError("AsyncHTTPClient must be used with async context manager")
        # Merge headers
        if "headers" in kwargs and kwargs["headers"]:
            headers = self._get_headers()
            headers.update(kwargs["headers"])
            kwargs["headers"] = headers
        else:
            kwargs["headers"] = self._get_headers()
        last_exception = None
        for attempt in range(self.retries):
            try:
                response = await self._client.request(method, url, **kwargs)
                response.raise_for_status()
                return response
            except httpx.TimeoutException as e:
                last_exception = e
                logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}")
                if attempt < self.retries - 1:
                    await asyncio.sleep(0.5)  # Brief delay before retry
                    continue
            except httpx.HTTPStatusError as e:
                # Don't retry on 4xx errors
                if 400 <= e.response.status_code < 500:
                    try:
                        response_text = e.response.text[:500]
                    except:
                        response_text = "<unable to read response>"
                    logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
                    raise
                last_exception = e
                try:
                    response_text = e.response.text[:200]
                except:
                    response_text = "<unable to read response>"
                logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
                if attempt < self.retries - 1:
                    await asyncio.sleep(0.5)
                    continue
            except (httpx.RequestError, httpx.ConnectError) as e:
                last_exception = e
                logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
                if attempt < self.retries - 1:
                    await asyncio.sleep(0.5)
                    continue
        if last_exception:
            logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
            raise last_exception
        raise RuntimeError("Request failed after retries")
    def _request_stream(self, method: str, url: str, **kwargs):
        """Make a streaming request."""
        if not self._client:
            raise RuntimeError("AsyncHTTPClient must be used with async context manager")
        # Merge headers
        if "headers" in kwargs and kwargs["headers"]:
            headers = self._get_headers()
            headers.update(kwargs["headers"])
            kwargs["headers"] = headers
        else:
            kwargs["headers"] = self._get_headers()
        return self._client.stream(method, url, **kwargs)
 # Convenience function for quick sync requests
 def get(url: str, **kwargs) -> httpx.Response:
    """Quick GET request without context manager."""
    with HTTPClient() as client:
        return client.get(url, **kwargs)
 def post(url: str, **kwargs) -> httpx.Response:
    """Quick POST request without context manager."""
    with HTTPClient() as client:
        return client.post(url, **kwargs)
 def download(
    url: str,
    file_path: str,
    progress_callback: Optional[Callable[[int, int], None]] = None,
    **kwargs
 ) -> Path:
    """Quick file download without context manager."""
    with HTTPClient() as client:
        return client.download(url, file_path, progress_callback=progress_callback, **kwargs)
--- a/helper/hydrus.py
+++ b/helper/hydrus.py
--- a/helper/libgen_service.py
+++ b/helper/libgen_service.py
@@ -0,0 +1,377 @@
 """Shared Library Genesis search and download helpers."""
 from __future__ import annotations
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Optional
 import logging
 import requests
 from urllib.parse import quote, urljoin
 from libgen import search_sync, LibgenError
 LogFn = Optional[Callable[[str], None]]
 ErrorFn = Optional[Callable[[str], None]]
 DEFAULT_TIMEOUT = 10.0
 DEFAULT_LIMIT = 50
 logging.getLogger(__name__).setLevel(logging.WARNING)
 def _call(logger: LogFn, message: str) -> None:
    if logger:
        logger(message)
 def search_libgen_no_ads(query: str, session: Optional[requests.Session] = None) -> List[Dict[str, Any]]:
    """Search Libgen without triggering ads.php requests."""
    try:
        from bs4 import BeautifulSoup
    except ImportError:  # pragma: no cover
        logging.warning("BeautifulSoup not available; falling back to standard search")
        return []
    mirrors = [
        "https://libgen.gl",
        "https://libgen.vg",
        "https://libgen.la",
        "https://libgen.bz",
        "https://libgen.gs",
    ]
    session = session or requests.Session()
    session.headers.setdefault(
        "User-Agent",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
    )
    for mirror in mirrors:
        try:
            search_url = f"{mirror}/index.php?req={quote(query)}&res=100&covers=on&filesuns=all"
            response = session.get(search_url, timeout=DEFAULT_TIMEOUT)
            if response.status_code != 200:
                continue
            soup = BeautifulSoup(response.content, "html.parser")
            table = soup.find("table", {"class": "catalog"})
            if table is None:
                for candidate in soup.find_all("table"):
                    rows = candidate.find_all("tr")
                    if len(rows) > 2:
                        table = candidate
                        break
            if table is None:
                logging.debug("[libgen_no_ads] No results table on %s", mirror)
                continue
            rows = table.find_all("tr")[1:]
            results: List[Dict[str, Any]] = []
            for row in rows:
                try:
                    cells = row.find_all("td")
                    if len(cells) < 9:
                        continue
                    size_cell = cells[7]
                    file_link = size_cell.find("a")
                    mirror_link = ""
                    if file_link:
                        href = str(file_link.get("href", ""))
                        if href.startswith("/"):
                            mirror_link = mirror + href
                        elif href:
                            mirror_link = urljoin(mirror, href)
                    if not mirror_link:
                        title_link = cells[1].find("a") if len(cells) > 1 else None
                        if title_link:
                            href = str(title_link.get("href", ""))
                            if href.startswith("/"):
                                mirror_link = mirror + href
                            elif href:
                                mirror_link = urljoin(mirror, href)
                    if not mirror_link:
                        continue
                    results.append(
                        {
                            "id": "",
                            "mirror": mirror_link,
                            "cover": "",
                            "title": cells[1].get_text(strip=True) if len(cells) > 1 else "Unknown",
                            "authors": [cells[2].get_text(strip=True)]
                            if len(cells) > 2
                            else ["Unknown"],
                            "publisher": cells[3].get_text(strip=True) if len(cells) > 3 else "",
                            "year": cells[4].get_text(strip=True) if len(cells) > 4 else "",
                            "pages": cells[6].get_text(strip=True) if len(cells) > 6 else "",
                            "language": cells[5].get_text(strip=True) if len(cells) > 5 else "",
                            "size": cells[7].get_text(strip=True) if len(cells) > 7 else "",
                            "extension": cells[8].get_text(strip=True) if len(cells) > 8 else "",
                            "isbn": "",
                        }
                    )
                except Exception as exc:  # pragma: no cover - defensive
                    logging.debug("[libgen_no_ads] Error parsing row: %s", exc)
                    continue
            if results:
                logging.info("[libgen_no_ads] %d results from %s", len(results), mirror)
                return results
        except Exception as exc:  # pragma: no cover - mirror issues
            logging.debug("[libgen_no_ads] Mirror %s failed: %s", mirror, exc)
            continue
    return []
 def format_book_info(book: Any) -> Dict[str, Any]:
    """Format Libgen search result into a consistent dictionary."""
    filesize_bytes = 0
    size_str = getattr(book, "size", "") or ""
    if size_str:
        parts = size_str.strip().split()
        try:
            value = float(parts[0])
            unit = parts[1].upper() if len(parts) > 1 else "B"
            if unit in {"MB", "M"}:
                filesize_bytes = int(value * 1024 * 1024)
            elif unit in {"GB", "G"}:
                filesize_bytes = int(value * 1024 * 1024 * 1024)
            elif unit in {"KB", "K"}:
                filesize_bytes = int(value * 1024)
            else:
                filesize_bytes = int(value)
        except (ValueError, IndexError):  # pragma: no cover - defensive
            filesize_bytes = 0
    title = getattr(book, "title", "") or ""
    isbn = getattr(book, "isbn", "") or ""
    if not isbn and title:
        import re
        match = re.search(
            r"((?:[\d]{10,13}(?:\s*[;,]\s*[\d]{10,13})+)|(?:[\d]{10,13})(?:\s*[;,]?\s*[\d\-]{0,50})?)\s*(?:\b|$)",
            title,
        )
        if match:
            potential_isbn = match.group(0).strip()
            if re.search(r"\d{10,13}", potential_isbn):
                isbn = potential_isbn
                title = re.sub(r"\s+[a-z]\s*$", "", title[: match.start()].strip(), flags=re.IGNORECASE)
    authors_value = getattr(book, "authors", None)
    if isinstance(authors_value, Iterable) and not isinstance(authors_value, str):
        authors_str = ", ".join(str(author) for author in authors_value)
    else:
        authors_str = str(authors_value or "Unknown")
    download_links = getattr(book, "download_links", None)
    mirror_url = None
    if download_links and getattr(download_links, "get_link", None):
        mirror_url = download_links.get_link
    return {
        "title": title or "Unknown",
        "author": authors_str,
        "publisher": getattr(book, "publisher", "") or "",
        "year": getattr(book, "year", "") or "",
        "pages": getattr(book, "pages", "") or "",
        "language": getattr(book, "language", "") or "",
        "filesize": filesize_bytes,
        "filesize_str": size_str or "Unknown",
        "extension": getattr(book, "extension", "") or "",
        "isbn": isbn,
        "mirror_url": mirror_url,
    }
 def search_libgen(
    query: str,
    limit: int = DEFAULT_LIMIT,
    *,
    log_info: LogFn = None,
    log_error: ErrorFn = None,
    session: Optional[requests.Session] = None,
 ) -> List[Dict[str, Any]]:
    """Search Libgen returning formatted dictionaries with multiple mirrors.
    Uses HTML scraper (search_libgen_no_ads) to find books quickly.
    Returns mirror URLs and book IDs that can be used to generate alternative mirrors.
    """
    try:
        _call(log_info, f"[search] Searching Libgen for: {query}")
        session = session or requests.Session()
        # Use HTML scraper - more reliable and doesn't hang on mirror resolution
        _call(log_info, "[search] Using HTML scraper (search_libgen_no_ads)...")
        results: List[Any] = search_libgen_no_ads(query, session=session)
        if not results:
            _call(log_info, "[search] No results from HTML scraper")
            return []
        formatted: List[Dict[str, Any]] = []
        mirrors_list = [
            "https://libgen.gl",
            "https://libgen.vg",
            "https://libgen.la",
            "https://libgen.bz",
            "https://libgen.gs",
        ]
        for book in results[:limit]:
            if isinstance(book, dict):
                # Result from search_libgen_no_ads (HTML scraper)
                authors = book.get("authors", ["Unknown"])
                if isinstance(authors, list):
                    author_value = ", ".join(str(a) for a in authors)
                else:
                    author_value = str(authors)
                # Extract book ID from mirror URL if available
                mirror = book.get("mirror", "")
                book_id = ""
                if mirror and "/file.php?id=" in mirror:
                    try:
                        book_id = mirror.split("/file.php?id=")[1].split("&")[0]
                    except (IndexError, ValueError):
                        pass
                # Build list of alternative mirrors based on book ID
                mirrors_dict = {}
                if book_id:
                    for mirror_base in mirrors_list:
                        mirrors_dict[mirror_base] = f"{mirror_base}/file.php?id={book_id}"
                elif mirror:
                    # Fallback: use the mirror we found
                    mirrors_dict["primary"] = mirror
                formatted.append(
                    {
                        "title": book.get("title", "Unknown"),
                        "author": author_value,
                        "publisher": book.get("publisher", ""),
                        "year": book.get("year", ""),
                        "pages": book.get("pages", ""),
                        "language": book.get("language", ""),
                        "filesize": 0,
                        "filesize_str": book.get("size", "Unknown"),
                        "extension": book.get("extension", ""),
                        "isbn": book.get("isbn", ""),
                        "mirror_url": mirror,  # Primary mirror
                        "mirrors": mirrors_dict,  # Alternative mirrors
                        "book_id": book_id,
                    }
                )
            else:
                # Fallback: try to format as book object
                try:
                    formatted.append(format_book_info(book))
                except Exception:
                    pass
        _call(log_info, f"[search] Found {len(formatted)} result(s)")
        return formatted
    except LibgenError as exc:
        _call(log_error, f"[search] Libgen error: {exc}")
        return []
    except Exception as exc:  # pragma: no cover - defensive
        _call(log_error, f"[search] Error: {exc}")
        return []
 def download_from_mirror(
    mirror_url: str,
    output_path: str | Path,
    *,
    log_info: LogFn = None,
    log_error: ErrorFn = None,
    session: Optional[requests.Session] = None,
 ) -> bool:
    """Download a Libgen file and write it to disk.
    Handles Libgen redirects and ensures proper file download by:
    - Following all redirects (default behavior)
    - Setting User-Agent header (required by some mirrors)
    - Validating that we're downloading binary content, not HTML
    - Attempting alternative download method if HTML is returned
    """
    session = session or requests.Session()
    try:
        output_path = Path(output_path)
        output_path.parent.mkdir(parents=True, exist_ok=True)
        _call(log_info, f"[download] Downloading from mirror: {mirror_url}")
        # Ensure session has proper headers for Libgen
        if 'User-Agent' not in session.headers:
            session.headers['User-Agent'] = (
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
            )
        # Download with redirects enabled (default) and referer
        session.headers['Referer'] = 'https://libgen.gs/'
        response = session.get(mirror_url, stream=True, timeout=30, allow_redirects=True)
        response.raise_for_status()
        # Check if we got HTML instead of a file (common Libgen issue)
        content_type = response.headers.get('content-type', '').lower()
        if 'text/html' in content_type:
            _call(log_error, f"[download] Server returned HTML. Trying alternative method...")
            # Try to extract file ID and use alternative CDN
            try:
                # Parse the HTML to extract MD5 or file ID
                from bs4 import BeautifulSoup
                soup = BeautifulSoup(response.text, 'html.parser')
                # Look for download link in the HTML
                # Common patterns: md5 hash in form, or direct link in anchor tags
                download_link = None
                # Try to find forms that might contain download functionality
                forms = soup.find_all('form')
                for form in forms:
                    action = form.get('action', '')
                    if 'download' in action.lower() or 'get' in action.lower():
                        download_link = action
                        break
                if not download_link:
                    _call(log_error, f"[download] Could not extract alternative download link from HTML")
                    return False
                _call(log_info, f"[download] Using alternative download method: {download_link[:100]}")
                # Try downloading from alternative link
                response2 = session.get(download_link, stream=True, timeout=30, allow_redirects=True)
                response2.raise_for_status()
                response = response2  # Use the new response
            except Exception as alt_error:
                _call(log_error, f"[download] Alternative method failed: {alt_error}")
                return False
        total_size = int(response.headers.get("content-length", 0))
        downloaded = 0
        with open(output_path, "wb") as handle:
            for chunk in response.iter_content(chunk_size=8192):
                if not chunk:
                    continue
                handle.write(chunk)
                downloaded += len(chunk)
                if total_size > 0:
                    percent = downloaded / total_size * 100
                    _call(
                        log_info,
                        f"[download] {percent:.1f}% - {downloaded // (1024*1024)}MB / {total_size // (1024*1024)}MB",
                    )
        _call(log_info, f"[download] Downloaded successfully to: {output_path}")
        return True
    except Exception as exc:  # pragma: no cover - defensive
        _call(log_error, f"[download] Error: {exc}")
        return False
--- a/helper/local_library.py
+++ b/helper/local_library.py
--- a/helper/logger.py
+++ b/helper/logger.py
@@ -0,0 +1,70 @@
 """Unified logging utility for automatic file and function name tracking."""
 import sys
 import inspect
 from pathlib import Path
 _DEBUG_ENABLED = False
 def set_debug(enabled: bool) -> None:
    """Enable or disable debug logging."""
    global _DEBUG_ENABLED
    _DEBUG_ENABLED = enabled
 def debug(*args, **kwargs) -> None:
    """Print debug message if debug logging is enabled.
    Automatically prepends [filename.function_name] to all output.
    """
    if not _DEBUG_ENABLED:
        return
    # Set default to stderr for debug messages
    if 'file' not in kwargs:
        kwargs['file'] = sys.stderr
    # Prepend DEBUG label
    args = ("DEBUG:", *args)
    # Use the same logic as log()
    log(*args, **kwargs)
 def log(*args, **kwargs) -> None:
    """Print with automatic file.function prefix.
    Automatically prepends [filename.function_name] to all output.
    Defaults to stdout if not specified.
    Example:
        log("Upload started")  # Output: [add_file.run] Upload started
    """
    # Get the calling frame
    frame = inspect.currentframe()
    if frame is None:
        print(*args, **kwargs)
        return
    caller_frame = frame.f_back
    if caller_frame is None:
        print(*args, **kwargs)
        return
    try:
        # Get file name without extension
        file_name = Path(caller_frame.f_code.co_filename).stem
        # Get function name
        func_name = caller_frame.f_code.co_name
        # Set default to stdout if not specified
        if 'file' not in kwargs:
            kwargs['file'] = sys.stdout
        # Build prefix
        prefix = f"[{file_name}.{func_name}]"
        # Print with prefix
        print(prefix, *args, **kwargs)
    finally:
        del frame
        del caller_frame
--- a/helper/mpv_file.py
+++ b/helper/mpv_file.py
@@ -0,0 +1,951 @@
 """MPV file metadata aggregation helpers."""
 from __future__ import annotations
 import os
 import re
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Sequence
 from urllib.parse import parse_qs, urlparse, unquote
 from config import get_hydrus_url
 from helper.utils import sha256_file, unique_preserve_order
 from helper.hydrus import HydrusClient, HydrusRequestError
 import metadata
 class MPVFileError(RuntimeError):
    """Raised when we cannot construct an MPV file snapshot."""
@dataclass(slots=True)
 class DebridMagnet:
    """Represents a magnet result from AllDebrid search.
    This class matches the structure expected by the TUI (like Hydrus results)
    with title, target, media_kind attributes for compatibility.
    """
    magnet_id: str
    title: str
    size: int
    status_code: int
    status_text: str
    progress: float
    downloaded: int
    seeders: int
    dl_speed: int
    tag_summary: Optional[str] = None
    metadata: Optional[Dict[str, Any]] = None  # Complete magnet file metadata from AllDebrid API
    @property
    def target(self) -> str:
        """Return the target URI for this magnet (used by TUI for access operations)."""
        return f"alldebrid://{self.magnet_id}"
    @property
    def media_kind(self) -> str:
        """Return media kind for display."""
        return "magnet"
    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for metadata display."""
        return {
            "magnet_id": self.magnet_id,
            "title": self.title,
            "size": self.size,
            "status_code": self.status_code,
            "status_text": self.status_text,
            "progress": f"{self.progress:.1f}%",
            "downloaded": self.downloaded,
            "seeders": self.seeders,
            "dl_speed": self.dl_speed,
        }
@dataclass(slots=True)
 class HydrusSettings:
    base_url: Optional[str]
    access_key: Optional[str]
    timeout: float
    prefer_service_name: Optional[str]
    include_relationships: bool
    def as_metadata_options(self) -> Dict[str, Any]:
        options: Dict[str, Any] = {
            "timeout": self.timeout,
            "include_relationships": self.include_relationships,
        }
        if self.prefer_service_name:
            options["prefer_service_name"] = self.prefer_service_name
        return options
@dataclass(slots=True)
 class MPVfile:
    path: Optional[str] = None
    filename: Optional[str] = None
    type: str = "unknown"
    hash: Optional[str] = None
    local_path: Optional[str] = None
    mpv_metadata: Dict[str, Any] = field(default_factory=dict)
    metadata: Dict[str, Any] = field(default_factory=dict)
    remote_metadata: Optional[Dict[str, Any]] = None
    relationships: Optional[Dict[str, Any]] = None
    relationship_metadata: Dict[str, Any] = field(default_factory=dict)
    tags: List[str] = field(default_factory=list)
    original_tags: Dict[str, str] = field(default_factory=dict)
    known_urls: List[str] = field(default_factory=list)
    title: Optional[str] = None
    source_url: Optional[str] = None
    clip_time: Optional[str] = None
    duration: Optional[float] = None
    filesize_mb: Optional[float] = None
    is_video: bool = False
    is_audio: bool = False
    is_deleted: Optional[bool] = None
    is_local: Optional[bool] = None
    has_current_file_service: Optional[bool] = None
    tag_service_key: Optional[str] = None
    swap_recommended: bool = False
    warnings: List[str] = field(default_factory=list)
    # New relationship fields for menu
    king: Optional[str] = None
    alts: List[str] = field(default_factory=list)
    def to_dict(self) -> Dict[str, Any]:
        payload: Dict[str, Any] = {
            "path": self.path,
            "filename": self.filename,
            "type": self.type,
            "hash": self.hash,
            "local_path": self.local_path,
            "mpv_metadata": self.mpv_metadata,
            "metadata": self.metadata,
            "remote_metadata": self.remote_metadata,
            "relationships": self.relationships,
            "relationship_metadata": self.relationship_metadata,
            "tags": self.tags,
            "original_tags": self.original_tags,
            "known_urls": self.known_urls,
            "title": self.title,
            "source_url": self.source_url,
            "clip_time": self.clip_time,
            "duration": self.duration,
            "filesize_mb": self.filesize_mb,
            "is_video": self.is_video,
            "is_audio": self.is_audio,
            "is_deleted": self.is_deleted,
            "is_local": self.is_local,
            "has_current_file_service": self.has_current_file_service,
            "tag_service_key": self.tag_service_key,
            "swap_recommended": self.swap_recommended,
            "warnings": self.warnings,
            # relationship summary fields for easier Lua consumption
            "king": self.king,
            "alts": self.alts,
        }
        # Remove empty optional values for terser payloads.
        for key in list(payload.keys()):
            value = payload[key]
            if value in (None, [], {}, ""):
                del payload[key]
        return payload
 def _normalise_string_list(values: Optional[Iterable[Any]]) -> List[str]:
    if not values:
        return []
    seen: set[str] = set()
    result: List[str] = []
    for value in values:
        if value is None:
            continue
        text = str(value).strip()
        if not text or text in seen:
            continue
        seen.add(text)
        result.append(text)
    return result
 def _looks_like_hash(value: Optional[str]) -> bool:
    if not value:
        return False
    candidate = value.strip().lower()
    return len(candidate) == 64 and all(ch in "0123456789abcdef" for ch in candidate)
 class MPVFileBuilder:
    def __init__(self, payload: Dict[str, Any], config: Dict[str, Any]):
        self.payload = payload or {}
        self.config = config or {}
        self.state = MPVfile()
        self.hydrus_settings = self._resolve_hydrus_settings()
        self.remote_options = self._resolve_remote_options()
        self.include_relationships = bool(self.payload.get("include_relationships", True))
        self.last_url = self._normalise_url(self.payload.get("last_url"))
        self._initialise_identity()
    # ------------------------------------------------------------------
    # public API
    # ------------------------------------------------------------------
    def build(self) -> Dict[str, Any]:
        if self.state.type == "hydrus":
            self._populate_hydrus_by_hash()
        elif self.state.type == "local":
            self._populate_local()
        elif self.state.type == "remote":
            self._populate_remote()
        else:
            # Attempt best effort resolution even for unknown types.
            self._populate_local(best_effort=True)
        self._finalise()
        result = self.state.to_dict()
        # Append King and Alts info to mpv_metadata for info menu
        king = self.state.king
        alts = self.state.alts
        if king:
            result.setdefault("mpv_metadata", {})["King"] = king
        if alts:
            result.setdefault("mpv_metadata", {})["Alts"] = ", ".join(alts)
        return result
    # ------------------------------------------------------------------
    # configuration helpers
    # ------------------------------------------------------------------
    def _resolve_hydrus_settings(self) -> HydrusSettings:
        overrides = self.payload.get("hydrus")
        overrides = overrides if isinstance(overrides, dict) else {}
        base_url = overrides.get("url") or overrides.get("base_url")
        access_key = overrides.get("access_key")
        timeout_raw = overrides.get("timeout") or overrides.get("hydrus_timeout")
        prefer_service = overrides.get("prefer_service_name")
        include_relationships = overrides.get("include_relationships")
        if base_url is None:
            base_url = get_hydrus_url(self.config)
        if access_key is None:
            raw_key = self.config.get("HydrusNetwork_Access_Key")
            access_key = str(raw_key) if raw_key is not None else None
        if timeout_raw is None:
            timeout_raw = self.config.get("HydrusNetwork_Request_Timeout")
        try:
            timeout = float(timeout_raw) if timeout_raw is not None else 60.0
        except (TypeError, ValueError):
            timeout = 60.0
        if prefer_service is None:
            prefer_service = self.config.get("Hydrus_Tag_Service")
        if isinstance(prefer_service, str):
            prefer_service = prefer_service.strip() or None
        if include_relationships is None:
            include_relationships = self.payload.get("include_relationships")
        include_relationships = bool(True if include_relationships is None else include_relationships)
        base_url = base_url.strip() if isinstance(base_url, str) else None
        access_key = access_key.strip() if isinstance(access_key, str) else None
        return HydrusSettings(
            base_url=base_url or None,
            access_key=access_key or None,
            timeout=timeout,
            prefer_service_name=prefer_service,
            include_relationships=include_relationships,
        )
    def _resolve_remote_options(self) -> Dict[str, Any]:
        remote_payload = self.payload.get("remote")
        remote_payload = remote_payload if isinstance(remote_payload, dict) else {}
        options = remote_payload.get("options")
        options = options if isinstance(options, dict) else {}
        ytdlp_args = options.get("ytdlp_args")
        if not ytdlp_args:
            options["ytdlp_args"] = ["--no-playlist", "--skip-download", "--no-warnings"]
        existing_timeout = options.get("timeout")
        if existing_timeout is None:
            options["timeout"] = min(90.0, max(10.0, float(self.payload.get("remote_timeout") or 45.0)))
        return options
    # ------------------------------------------------------------------
    # initialisation
    # ------------------------------------------------------------------
    def _initialise_identity(self) -> None:
        s = self.state
        p = self.payload
        def _str_or_none(v):
            return str(v) if v is not None and v != "" else None
        def _copy_dict_if_dict(v):
            return dict(v) if isinstance(v, dict) else {}
        # path and filename
        s.path = _str_or_none(p.get("path"))
        s.filename = _str_or_none(p.get("filename"))
        # mpv metadata
        s.mpv_metadata = _copy_dict_if_dict(p.get("mpv_metadata"))
        # tags (support both "tags" and legacy "existing_tags")
        existing_tags = p.get("tags") or p.get("existing_tags")
        s.tags = _normalise_string_list(existing_tags)
        if s.tags:
            s.original_tags = {tag: tag for tag in s.tags}
        # known URLs + last_url
        s.known_urls = _normalise_string_list(p.get("known_urls"))
        if self.last_url and self.last_url not in s.known_urls:
            s.known_urls.append(self.last_url)
        # source URL (explicit or fallback to last_url)
        explicit_source = p.get("source_url")
        s.source_url = self._normalise_url(explicit_source) or self.last_url
        # hash (validate looks-like-hash)
        hash_candidate = p.get("hash")
        if isinstance(hash_candidate, str):
            candidate = hash_candidate.strip().lower()
            if _looks_like_hash(candidate):
                s.hash = candidate
        # local_path (non-empty string)
        local_path_override = p.get("local_path")
        if isinstance(local_path_override, str):
            lp = local_path_override.strip()
            if lp:
                s.local_path = lp
        # derive remaining fields from path/filename/type
        self._derive_filename_from_path()
        self._determine_type()
    def _derive_filename_from_path(self) -> None:
        if self.state.filename or not self.state.path:
            return
        parsed = urlparse(self.state.path)
        if parsed.scheme in ("http", "https", "ytdl") and parsed.path:
            candidate = Path(parsed.path).name
            if candidate:
                self.state.filename = candidate
        elif parsed.scheme == "file":
            decoded = self._decode_file_url(self.state.path)
            if decoded:
                self.state.filename = Path(decoded).name
        else:
            try:
                self.state.filename = Path(self.state.path).name
            except Exception:
                pass
    def _determine_type(self) -> None:
        s = self.state
        p = self.payload
        def _set_local_from_path(pth: str | None):
            if not pth:
                return
            # Prefer resolved local path when available
            resolved = self._resolve_local_path(pth)
            s.local_path = resolved if resolved else pth
            s.type = "local"
        # 1) Respect explicit type when valid
        explicit = p.get("type")
        if isinstance(explicit, str):
            lowered = explicit.strip().lower()
            if lowered in {"local", "hydrus", "remote"}:
                s.type = lowered
                if lowered == "local":
                    s.local_path = self._resolve_local_path(s.path)
                return
        # 2) Work from path
        path = s.path or ""
        if not path:
            s.type = "unknown"
            return
        # 3) Hydrus-specific quick checks
        if self._looks_like_hydrus_url(path):
            s.type = "hydrus"
            return
        parsed = urlparse(path)
        scheme = (parsed.scheme or "").lower()
        # 4) scheme-based handling
        if scheme == "hydrus":
            s.type = "hydrus"
            return
        if scheme in {"http", "https", "rtmp", "rtsp", "magnet", "ytdl"}:
            s.type = "hydrus" if self._looks_like_hydrus_url(path) else "remote"
            return
        if scheme == "file":
            decoded = self._decode_file_url(path)
            if decoded:
                s.local_path = decoded
            s.type = "local"
            return
        # 5) Windows/UNC absolute paths
        if re.match(r"^[A-Za-z]:[\\/]", path) or path.startswith(("\\\\", "//")):
            s.type = "local"
            s.local_path = path
            return
        # 6) Fallback: if it looks like a URL with a scheme separator treat as remote/hydrus
        if "://" in path:
            s.type = "hydrus" if self._looks_like_hydrus_url(path) else "remote"
            return
        # 7) Otherwise treat as a local path
        _set_local_from_path(path)
    # ------------------------------------------------------------------
    # population helpers
    # ------------------------------------------------------------------
    def _populate_local(self, best_effort: bool = False) -> None:
        local_path = self.state.local_path or self._resolve_local_path(self.state.path)
        if local_path:
            self.state.local_path = local_path
            self._load_sidecar_tags(local_path)
            if not self.state.hash:
                self._compute_local_hash(local_path)
        # If Hydrus is configured and we have a hash, enrich from Hydrus; otherwise keep local tags only
        if self.state.hash and self.hydrus_settings.base_url and self.hydrus_settings.access_key:
            self._populate_hydrus_by_hash()
        elif best_effort and self.hydrus_settings.base_url and self.state.source_url and self.hydrus_settings.access_key:
            self._populate_hydrus_by_url(self.state.source_url)
    # (helpers for resolving local path and loading sidecars already exist below)
    def _populate_remote(self) -> None:
        source_url = self.state.source_url or self.last_url or self.state.path
        source_url = self._normalise_url(source_url)
        if source_url:
            self.state.source_url = source_url
        remote_payload = {
            "source_url": self.state.source_url,
            "existing_tags": self.state.tags,
            "metadata": self.payload.get("remote_metadata"),
            "mpv_metadata": self.state.mpv_metadata,
            "options": self.remote_options,
        }
        try:
            remote_result = metadata.resolve_remote_metadata(remote_payload)
        except Exception as exc:  # pragma: no cover - surfaced to the caller
            self.state.warnings.append(str(exc))
            remote_result = None
        if remote_result:
            tags = remote_result.get("tags") or []
            self._merge_tags(tags)
            self.state.remote_metadata = remote_result.get("metadata")
            self.state.title = remote_result.get("title") or self.state.title
            self.state.duration = remote_result.get("duration") or self.state.duration
            self.state.source_url = remote_result.get("source_url") or self.state.source_url
            warnings = remote_result.get("warnings") or []
            if warnings:
                self.state.warnings.extend(warnings)
        if self.hydrus_settings.base_url and self.state.source_url:
            self._populate_hydrus_by_url(self.state.source_url)
    def _populate_hydrus_by_hash(self) -> None:
        hash_hex = self.state.hash or self._extract_hash_from_path(self.state.path)
        if hash_hex and not _looks_like_hash(hash_hex):
            hash_hex = None
        if not hash_hex:
            return
        self.state.hash = hash_hex
        if not self.hydrus_settings.base_url:
            return
        payload: Dict[str, Any] = {
            "api_url": self.hydrus_settings.base_url,
            "access_key": self.hydrus_settings.access_key or "",
            "options": self.hydrus_settings.as_metadata_options(),
            "hash": hash_hex,
        }
        try:
            result = metadata.fetch_hydrus_metadata(payload)
        except Exception as exc:  # pragma: no cover - surfaced to caller
            self.state.warnings.append(str(exc))
            return
        self._apply_hydrus_result(result)
        # Enrich relationships using the dedicated Hydrus endpoint (robust GET)
        if self.include_relationships and self.state.hash and self.hydrus_settings.base_url:
            self._enrich_relationships_from_api(self.state.hash)
    def _populate_hydrus_by_url(self, url: str) -> None:
        if not self.hydrus_settings.base_url:
            return
        payload: Dict[str, Any] = {
            "api_url": self.hydrus_settings.base_url,
            "access_key": self.hydrus_settings.access_key or "",
            "options": self.hydrus_settings.as_metadata_options(),
            "url": url,
        }
        try:
            result = metadata.fetch_hydrus_metadata_by_url(payload)
        except Exception as exc:  # pragma: no cover - surfaced to caller
            self.state.warnings.append(str(exc))
            return
        if result.get("error") == "not_found":
            self.state.warnings.extend(result.get("warnings") or [])
            return
        self._apply_hydrus_result(result)
        self.state.type = "hydrus"
        matched_url = result.get("matched_url") or result.get("url")
        if matched_url and matched_url not in self.state.known_urls:
            self.state.known_urls.append(matched_url)
        # Enrich relationships once we know the hash
        if self.include_relationships and self.state.hash and self.hydrus_settings.base_url:
            self._enrich_relationships_from_api(self.state.hash)
    # ------------------------------------------------------------------
    # state modification helpers
    # ------------------------------------------------------------------
    def _apply_hydrus_result(self, result: Dict[str, Any]) -> None:
        metadata_payload = result.get("metadata")
        if isinstance(metadata_payload, dict):
            # Process mime into type for Lua
            mime = metadata_payload.get("mime")
            if isinstance(mime, str):
                if mime.startswith("video/"):
                    metadata_payload["type"] = "video"
                elif mime.startswith("audio/"):
                    metadata_payload["type"] = "audio"
                elif mime.startswith("image/"):
                    metadata_payload["type"] = "image"
                else:
                    metadata_payload["type"] = "other"
            self.state.metadata = metadata_payload
            # Do NOT overwrite MPVfile.type with metadata.type
            self._merge_known_urls(metadata_payload.get("known_urls") or metadata_payload.get("known_urls_set"))
            source_url = metadata_payload.get("original_url") or metadata_payload.get("source_url")
            if source_url and not self.state.source_url:
                self.state.source_url = self._normalise_url(source_url)
            # If file_relationships are embedded in metadata, capture as relationships when missing
            if self.state.relationships is None:
                embedded = metadata_payload.get("file_relationships")
                if isinstance(embedded, dict) and embedded:
                    self.state.relationships = embedded
        tags = result.get("tags") or []
        self._merge_tags(tags)
        hash_value = result.get("hash") or result.get("matched_hash")
        if isinstance(hash_value, str) and _looks_like_hash(hash_value):
            self.state.hash = hash_value.lower()
        self.state.tag_service_key = result.get("tag_service_key") or self.state.tag_service_key
        self.state.duration = result.get("duration") or self.state.duration
        self.state.filesize_mb = result.get("filesize_mb") or self.state.filesize_mb
        self.state.is_video = bool(result.get("is_video") or self.state.is_video)
        self.state.is_audio = bool(result.get("is_audio") or self.state.is_audio)
        if result.get("is_deleted") is not None:
            self.state.is_deleted = bool(result.get("is_deleted"))
        if result.get("is_local") is not None:
            self.state.is_local = bool(result.get("is_local"))
        if result.get("has_current_file_service") is not None:
            self.state.has_current_file_service = bool(result.get("has_current_file_service"))
        # Consolidate relationships from explicit result or embedded metadata
        relationships_obj: Optional[Dict[str, Any]] = None
        if isinstance(result.get("relationships"), dict):
            relationships_obj = result["relationships"]
            self.state.relationships = relationships_obj
        elif isinstance(self.state.relationships, dict):
            relationships_obj = self.state.relationships
        # Helper to flatten any hashes from the relationships object
        def _collect_hashes(obj: Any, acc: set[str]) -> None:
            if obj is None:
                return
            if isinstance(obj, dict):
                for v in obj.values():
                    _collect_hashes(v, acc)
            elif isinstance(obj, (list, tuple, set)):
                for v in obj:
                    _collect_hashes(v, acc)
            elif isinstance(obj, str) and _looks_like_hash(obj):
                acc.add(obj.lower())
        # Derive king and alts robustly from available data
        king: Optional[str] = None
        alts: list[str] = []
        # 1) Try direct king fields on relationships object
        rels = relationships_obj or {}
        if isinstance(rels, dict):
            # Common variants
            for key in ("king", "king_hash", "duplicate_king", "best", "best_hash"):
                val = rels.get(key)
                if isinstance(val, str) and _looks_like_hash(val):
                    king = val.lower()
                    break
                if isinstance(val, list):
                    for h in val:
                        if isinstance(h, str) and _looks_like_hash(h):
                            king = h.lower()
                            break
                    if king:
                        break
            # 2) Extract alternates from known fields: numeric "3" (clips), or textual synonyms
            for alt_key in ("3", "alternates", "alts", "clips"):
                val = rels.get(alt_key)
                if isinstance(val, list):
                    for h in val:
                        if isinstance(h, str) and _looks_like_hash(h):
                            h_low = h.lower()
                            if not king or h_low != king:
                                alts.append(h_low)
                # some APIs might nest
                elif isinstance(val, dict):
                    tmp: set[str] = set()
                    _collect_hashes(val, tmp)
                    for h in sorted(tmp):
                        if not king or h != king:
                            alts.append(h)
        # 3) Use relationship_metadata keys as additional alternates and king hint
        rel_meta = result.get("relationship_metadata")
        if isinstance(rel_meta, dict):
            # prefer king candidate with no clip_time if not set
            if not king:
                for h, meta in rel_meta.items():
                    if isinstance(h, str) and _looks_like_hash(h) and isinstance(meta, dict):
                        if not meta.get("clip_time"):
                            king = h.lower()
                            break
            for h in rel_meta.keys():
                if isinstance(h, str) and _looks_like_hash(h):
                    h_low = h.lower()
                    if not king or h_low != king:
                        alts.append(h_low)
        # 4) As a last resort, flatten all relationship hashes
        if not alts and relationships_obj:
            tmp: set[str] = set()
            _collect_hashes(relationships_obj, tmp)
            for h in sorted(tmp):
                if not king or h != king:
                    alts.append(h)
        # 5) Include current file when appropriate
        if self.state.hash and (not king or self.state.hash != king) and self.state.hash not in alts:
            alts.append(self.state.hash)
        # 6) Sort alternates by clip start time when available
        rel_meta_all = result.get("relationship_metadata") if isinstance(result.get("relationship_metadata"), dict) else {}
        def _clip_start_for(h: str) -> float:
            meta = rel_meta_all.get(h) if isinstance(rel_meta_all, dict) else None
            clip = meta.get("clip_time") if isinstance(meta, dict) else None
            if isinstance(clip, str):
                m = re.match(r"^(\d+)-(\d+)$", clip)
                if m:
                    try:
                        return float(m.group(1))
                    except Exception:
                        return float("inf")
            return float("inf")
        if alts:
            # de-duplicate while preserving earliest clip time ordering
            seen: set[str] = set()
            alts = [h for h in sorted(alts, key=_clip_start_for) if (h not in seen and not seen.add(h))]
        self.state.king = king
        self.state.alts = alts
        if isinstance(result.get("relationship_metadata"), dict):
            self.state.relationship_metadata = result["relationship_metadata"]
        self.state.title = result.get("title") or self.state.title
        self.state.clip_time = result.get("clip_time") or self.state.clip_time
        if result.get("swap_recommended"):
            self.state.swap_recommended = True
        warnings = result.get("warnings") or []
        if warnings:
            self.state.warnings.extend(warnings)
    # ------------------------------------------------------------------
    # relationships enrichment (Hydrus endpoint + alt metadata)
    # ------------------------------------------------------------------
    def _enrich_relationships_from_api(self, file_hash: str) -> None:
        """Fetch relationships for the given hash and enrich state's king/alts and alt metadata.
        - Uses GET /manage_file_relationships/get_file_relationships?hash=...
        - If alts exist, batch-fetch their metadata via GET /get_files/file_metadata?hashes=[...]
        - Extracts title, duration, size, tags (cleaned: title: kept with namespace, others stripped)
        """
        base_url = self.hydrus_settings.base_url or ""
        access_key = self.hydrus_settings.access_key or ""
        if not base_url:
            return
        try:
            client = HydrusClient(base_url, access_key, timeout=self.hydrus_settings.timeout)
        except Exception as exc:  # pragma: no cover - construction should rarely fail
            self.state.warnings.append(f"Hydrus client init failed: {exc}")
            return
        try:
            rel_resp = client.get_file_relationships(file_hash)
        except HydrusRequestError as hre:  # pragma: no cover - surfaced but non-fatal
            self.state.warnings.append(f"relationships api: {hre}")
            return
        except Exception as exc:  # pragma: no cover
            self.state.warnings.append(f"relationships api: {exc}")
            return
        rel_map = rel_resp.get("file_relationships") or {}
        rel_obj = None
        if isinstance(rel_map, dict):
            rel_obj = rel_map.get(file_hash) or next((v for v in rel_map.values() if isinstance(v, dict)), None)
        if isinstance(rel_obj, dict):
            # Preserve the full relationships object
            self.state.relationships = rel_obj
            # Update king and alts from canonical fields
            king = rel_obj.get("king")
            alts = rel_obj.get("3") or []
            if isinstance(king, str) and _looks_like_hash(king):
                self.state.king = king.lower()
            if isinstance(alts, list):
                self.state.alts = [h.lower() for h in alts if isinstance(h, str) and _looks_like_hash(h)]
        # Fetch alt metadata if we have alts
        if not self.state.alts:
            return
        try:
            meta_resp = client.fetch_file_metadata(
                hashes=self.state.alts,
                include_service_keys_to_tags=True,
                include_duration=True,
                include_size=True,
                include_file_urls=False,
                include_mime=False,
            )
        except HydrusRequestError as hre:  # pragma: no cover
            self.state.warnings.append(f"metadata api: {hre}")
            return
        except Exception as exc:  # pragma: no cover
            self.state.warnings.append(f"metadata api: {exc}")
            return
        if not isinstance(meta_resp, dict):
            return
        entries = meta_resp.get("metadata") or []
        if not isinstance(entries, list):
            return
        def _extract_tags(meta: Dict[str, Any]) -> list[str]:
            tags: list[str] = []
            tag_root = meta.get("tags") or meta.get("service_keys_to_statuses_to_tags") or {}
            if isinstance(tag_root, dict):
                for service_dict in tag_root.values():
                    if not isinstance(service_dict, dict):
                        continue
                    # Prefer storage_tags but fall back to any list values under known keys
                    storage = service_dict.get("storage_tags")
                    if isinstance(storage, dict):
                        for vals in storage.values():
                            if isinstance(vals, list):
                                tags.extend([str(t) for t in vals if isinstance(t, str)])
                    else:
                        # fall back: inspect lists directly under service_dict
                        for vals in service_dict.values():
                            if isinstance(vals, list):
                                tags.extend([str(t) for t in vals if isinstance(t, str)])
            return tags
        def _clean_tags_and_title(all_tags: list[str]) -> tuple[Optional[str], list[str]]:
            title_val: Optional[str] = None
            cleaned: list[str] = []
            for tag in all_tags:
                if not isinstance(tag, str):
                    continue
                if tag.startswith("title:"):
                    if title_val is None:
                        title_val = tag.split(":", 1)[1]
                    cleaned.append(tag)  # keep namespaced title
                else:
                    if ":" in tag:
                        cleaned.append(tag.split(":", 1)[1])
                    else:
                        cleaned.append(tag)
            return title_val, cleaned
        for meta in entries:
            if not isinstance(meta, dict):
                continue
            h = meta.get("hash")
            if not (isinstance(h, str) and _looks_like_hash(h)):
                continue
            tags_all = _extract_tags(meta)
            title_val, tags_clean = _clean_tags_and_title(tags_all)
            alt_info = {
                "title": title_val,
                "duration": meta.get("duration"),
                "size": meta.get("size"),
                "tags": tags_clean,
            }
            self.state.relationship_metadata[h.lower()] = alt_info
    def _merge_tags(self, tags: Sequence[Any]) -> None:
        incoming = _normalise_string_list(tags)
        if not incoming:
            return
        combined = list(self.state.tags or []) + incoming
        self.state.tags = unique_preserve_order(combined)
        for tag in incoming:
            if tag not in self.state.original_tags:
                self.state.original_tags[tag] = tag
    def _merge_known_urls(self, urls: Optional[Iterable[Any]]) -> None:
        if not urls:
            return
        combined = list(self.state.known_urls or []) + _normalise_string_list(urls)
        self.state.known_urls = unique_preserve_order(combined)
    def _load_sidecar_tags(self, local_path: str) -> None:
        try:
            media_path = Path(local_path)
        except Exception:
            return
        if not media_path.exists():
            return
        candidates = [media_path.with_suffix(".tags"), media_path.with_suffix(".tags.txt")]
        for candidate in candidates:
            if candidate.exists():
                hash_value, tags, known = self._read_sidecar(candidate)
                if hash_value and not self.state.hash and _looks_like_hash(hash_value):
                    self.state.hash = hash_value.lower()
                self._merge_tags(tags)
                self._merge_known_urls(known)
                break
    def _read_sidecar(self, sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]:
        try:
            raw = sidecar_path.read_text(encoding="utf-8", errors="ignore")
        except OSError:
            return None, [], []
        hash_value: Optional[str] = None
        tags: List[str] = []
        known_urls: List[str] = []
        for line in raw.splitlines():
            trimmed = line.strip()
            if not trimmed:
                continue
            lowered = trimmed.lower()
            if lowered.startswith("hash:"):
                candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else ""
                if candidate:
                    hash_value = candidate
            elif lowered.startswith("known_url:") or lowered.startswith("url:"):
                candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else ""
                if candidate:
                    known_urls.append(candidate)
            else:
                tags.append(trimmed)
        return hash_value, tags, known_urls
    def _compute_local_hash(self, local_path: str) -> None:
        try:
            digest = sha256_file(Path(local_path))
        except OSError as exc:
            self.state.warnings.append(f"sha256 failed: {exc}")
            return
        self.state.hash = digest.lower()
    # ------------------------------------------------------------------
    # finalisation helpers
    # ------------------------------------------------------------------
    def _finalise(self) -> None:
        if self.state.tags:
            self.state.tags = unique_preserve_order(self.state.tags)
        if self.state.known_urls:
            self.state.known_urls = unique_preserve_order(self.state.known_urls)
        # Ensure metadata.type is always present for Lua, but do NOT overwrite MPVfile.type
        if not self.state.title:
            if self.state.metadata.get("title"):
                self.state.title = str(self.state.metadata["title"]).strip()
            elif self.state.filename:
                self.state.title = self.state.filename
        if self.state.hash and not _looks_like_hash(self.state.hash):
            self.state.hash = None
        if self.state.relationship_metadata is None:
            self.state.relationship_metadata = {}
        if self.state.relationships is not None and not isinstance(self.state.relationships, dict):
            self.state.relationships = None
        if self.state.original_tags is None:
            self.state.original_tags = {}
    # ------------------------------------------------------------------
    # util helpers
    # ------------------------------------------------------------------
    @staticmethod
    def _normalise_url(value: Any) -> Optional[str]:
        if value is None:
            return None
        text = str(value).strip()
        if not text:
            return None
        return text
    @staticmethod
    def _resolve_local_path(path: Optional[str]) -> Optional[str]:
        if not path:
            return None
        parsed = urlparse(path)
        if parsed.scheme == "file":
            decoded = MPVFileBuilder._decode_file_url(path)
            return decoded
        return path
    @staticmethod
    def _decode_file_url(value: str) -> Optional[str]:
        parsed = urlparse(value)
        if parsed.scheme != "file":
            return None
        netloc = parsed.netloc or ""
        path = unquote(parsed.path or "")
        if netloc:
            path = f"//{netloc}{path}"
        if os.name == "nt" and path.startswith("/") and re.match(r"/[A-Za-z]:", path):
            path = path[1:]
        path = path.replace("/", os.sep)
        return path
    def _looks_like_hydrus_url(self, url: str) -> bool:
        if not url:
            return False
        if url.startswith("hydrus://"):
            return True
        if "Hydrus-Client-API-Access-Key=" in url:
            return True
        base = self.hydrus_settings.base_url
        if base and url.startswith(base) and "/get_files/" in url:
            return True
        return False
    @staticmethod
    def _extract_hash_from_path(path: Optional[str]) -> Optional[str]:
        if not path:
            return None
        parsed = urlparse(path)
        query = parse_qs(parsed.query)
        if "hash" in query and query["hash"]:
            candidate = query["hash"][0].strip()
            if candidate:
                return candidate.lower()
        match = re.search(r"hash=([0-9a-fA-F]{64})", path)
        if match:
            return match.group(1).lower()
        return None
 def build_mpv_file_state(payload: Dict[str, Any], config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
    builder = MPVFileBuilder(payload or {}, config or {})
    return builder.build()
--- a/helper/progress.py
+++ b/helper/progress.py
@@ -0,0 +1,143 @@
 #!/usr/bin/env python3
 """Text-based progress bar utilities for consistent display across all downloads."""
 import sys
 from helper.logger import log, debug
 def format_progress_bar(current: int, total: int, width: int = 40, label: str = "") -> str:
    """Create a text-based progress bar.
    Args:
        current: Current progress (bytes/items)
        total: Total to complete (bytes/items)
        width: Width of the bar in characters (default 40)
        label: Optional label prefix
    Returns:
        Formatted progress bar string
    Examples:
        format_progress_bar(50, 100)
        # Returns: "[████████████████░░░░░░░░░░░░░░░░░░░░] 50.0%"
        format_progress_bar(256*1024*1024, 1024*1024*1024, label="download.zip")
        # Returns: "download.zip: [████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0%"
    """
    if total <= 0:
        percentage = 0
        filled = 0
    else:
        percentage = (current / total) * 100
        filled = int((current / total) * width)
    # Create bar: filled blocks + empty blocks
    bar = "█" * filled + "░" * (width - filled)
    # Format percentage
    pct_str = f"{percentage:.1f}%"
    # Build result
    if label:
        result = f"{label}: [{bar}] {pct_str}"
    else:
        result = f"[{bar}] {pct_str}"
    return result
 def format_size(bytes_val: float) -> str:
    """Format bytes to human-readable size.
    Examples:
        format_size(1024) -> "1.00 KB"
        format_size(1024*1024) -> "1.00 MB"
        format_size(1024*1024*1024) -> "1.00 GB"
    """
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if bytes_val < 1024:
            return f"{bytes_val:.2f} {unit}"
        bytes_val /= 1024
    return f"{bytes_val:.2f} PB"
 def format_download_status(filename: str, current: int, total: int, speed: float = 0) -> str:
    """Format download status with progress bar and details.
    Args:
        filename: Name of file being downloaded
        current: Current bytes downloaded
        total: Total file size
        speed: Download speed in bytes/sec
    Returns:
        Formatted status line
    Examples:
        format_download_status("movie.mkv", 512*1024*1024, 2*1024*1024*1024, 10*1024*1024)
        # Returns: "movie.mkv: [████████████░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0% (512.00 MB / 2.00 GB @ 10.00 MB/s)"
    """
    bar = format_progress_bar(current, total, width=30)
    size_current = format_size(current)
    size_total = format_size(total)
    if speed > 0:
        speed_str = f" @ {format_size(speed)}/s"
    else:
        speed_str = ""
    return f"{bar} ({size_current} / {size_total}{speed_str})"
 def print_progress(filename: str, current: int, total: int, speed: float = 0, end: str = "\r") -> None:
    """Print download progress to stderr (doesn't interfere with piped output).
    Args:
        filename: File being downloaded
        current: Current bytes
        total: Total bytes
        speed: Speed in bytes/sec
        end: Line ending (default "\r" for overwriting, use "\n" for final)
    """
    status = format_download_status(filename, current, total, speed)
    debug(status, end=end, flush=True)
 def print_final_progress(filename: str, total: int, elapsed: float) -> None:
    """Print final progress line (100%) with time elapsed.
    Args:
        filename: File that was downloaded
        total: Total size
        elapsed: Time elapsed in seconds
    """
    bar = format_progress_bar(total, total, width=30)
    size_str = format_size(total)
    # Format elapsed time
    if elapsed < 60:
        time_str = f"{elapsed:.1f}s"
    elif elapsed < 3600:
        minutes = elapsed / 60
        time_str = f"{minutes:.1f}m"
    else:
        hours = elapsed / 3600
        time_str = f"{hours:.2f}h"
    debug(f"{bar} ({size_str}) - {time_str}")
 if __name__ == "__main__":
    # Demo
    import time
    log("Progress Bar Demo:", file=sys.stderr)
    # Demo 1: Simple progress
    for i in range(101):
        print_progress("demo.bin", i * 10 * 1024 * 1024, 1024 * 1024 * 1024)
        time.sleep(0.02)
    print_final_progress("demo.bin", 1024 * 1024 * 1024, 2.0)
    log()
--- a/helper/query_parser.py
+++ b/helper/query_parser.py
@@ -0,0 +1,159 @@
 """Dynamic query parser for filtering and field extraction.
 Supports query syntax like:
  - isbn:0557677203
  - author:"Albert Pike"
  - title:"Morals and Dogma"
  - year:2010
  - isbn:0557677203 author:"Albert Pike"
  - Mixed with free text: "Morals" isbn:0557677203
 This allows flexible query strings that can be parsed by any search provider
 to extract specific fields for filtering and searching.
 """
 from typing import Dict, List, Tuple, Optional, Any
 import re
 def parse_query(query: str) -> Dict[str, Any]:
    """Parse a query string into field:value pairs and free text.
    Args:
        query: Query string like 'isbn:0557677203 author:"Albert Pike" Morals'
    Returns:
        Dictionary with:
        - 'fields': Dict[field_name, field_value] for structured fields
        - 'text': str with remaining free text
        - 'raw': str original query
    """
    result = {
        'fields': {},
        'text': '',
        'raw': query,
    }
    if not query or not query.strip():
        return result
    query = query.strip()
    remaining_parts = []
    # Pattern to match: field:value or field:"quoted value"
    # Matches: word: followed by either quoted string or unquoted word
    pattern = r'(\w+):(?:"([^"]*)"|(\S+))'
    pos = 0
    for match in re.finditer(pattern, query):
        # Add any text before this match
        if match.start() > pos:
            before_text = query[pos:match.start()].strip()
            if before_text:
                remaining_parts.append(before_text)
        field_name = match.group(1).lower()
        field_value = match.group(2) if match.group(2) is not None else match.group(3)
        result['fields'][field_name] = field_value
        pos = match.end()
    # Add any remaining text after last match
    if pos < len(query):
        remaining_text = query[pos:].strip()
        if remaining_text:
            remaining_parts.append(remaining_text)
    result['text'] = ' '.join(remaining_parts)
    return result
 def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]:
    """Get a field value from parsed query, with optional default.
    Args:
        parsed_query: Result from parse_query()
        field_name: Field name to look up (case-insensitive)
        default: Default value if field not found
    Returns:
        Field value or default
    """
    return parsed_query.get('fields', {}).get(field_name.lower(), default)
 def has_field(parsed_query: Dict[str, Any], field_name: str) -> bool:
    """Check if a field exists in parsed query.
    Args:
        parsed_query: Result from parse_query()
        field_name: Field name to check (case-insensitive)
    Returns:
        True if field exists
    """
    return field_name.lower() in parsed_query.get('fields', {})
 def get_free_text(parsed_query: Dict[str, Any]) -> str:
    """Get the free text portion of a parsed query.
    Args:
        parsed_query: Result from parse_query()
    Returns:
        Free text or empty string
    """
    return parsed_query.get('text', '')
 def build_query_for_provider(
    parsed_query: Dict[str, Any],
    provider: str,
    extraction_map: Optional[Dict[str, str]] = None
 ) -> Tuple[str, Dict[str, str]]:
    """Build a search query and filters dict for a specific provider.
    Different providers have different search syntax. This function
    extracts the appropriate fields for each provider.
    Args:
        parsed_query: Result from parse_query()
        provider: Provider name ('libgen', 'openlibrary', 'soulseek')
        extraction_map: Optional mapping of field names to provider-specific names
                       e.g. {'isbn': 'isbn', 'author': 'author', 'title': 'title'}
    Returns:
        Tuple of (search_query: str, extracted_fields: Dict[field, value])
    """
    extraction_map = extraction_map or {}
    extracted = {}
    free_text = get_free_text(parsed_query)
    # Extract fields based on map
    for field_name, provider_key in extraction_map.items():
        if has_field(parsed_query, field_name):
            extracted[provider_key] = get_field(parsed_query, field_name)
    # If provider-specific extraction needed, providers can implement it
    # For now, return the free text as query
    return free_text, extracted
 if __name__ == '__main__':
    # Test cases
    test_queries = [
        'isbn:0557677203',
        'isbn:0557677203 author:"Albert Pike"',
        'Morals and Dogma isbn:0557677203',
        'title:"Morals and Dogma" author:"Albert Pike" year:2010',
        'search term without fields',
        'author:"John Smith" title:"A Book"',
    ]
    for query in test_queries:
        print(f"\nQuery: {query}")
        parsed = parse_query(query)
        print(f"  Fields: {parsed['fields']}")
        print(f"  Text: {parsed['text']}")
--- a/helper/search_provider.py
+++ b/helper/search_provider.py
--- a/helper/tasks.py
+++ b/helper/tasks.py
@@ -0,0 +1,155 @@
 """Background task handling and IPC helpers for mpv integration."""
 from __future__ import annotations
 import errno
 import json
 import os
 import socket
 import subprocess
 import sys
 from helper.logger import log
 import threading
 import time
 from typing import IO, Iterable
 def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
    """Connect to the mpv IPC server located at *path*."""
    deadline = time.time() + timeout
    if not path:
        return None
    if os.name == 'nt':
        # mpv exposes a named pipe on Windows. Keep retrying until it is ready.
        while True:
            try:
                return open(path, 'r+b', buffering=0)
            except FileNotFoundError:
                if time.time() > deadline:
                    return None
                time.sleep(0.05)
            except OSError as exc:  # Pipe busy
                if exc.errno not in (errno.ENOENT, errno.EPIPE, errno.EBUSY):
                    raise
                if time.time() > deadline:
                    return None
                time.sleep(0.05)
    else:
        sock = socket.socket(socket.AF_UNIX)
        while True:
            try:
                sock.connect(path)
                return sock.makefile('r+b', buffering=0)
            except FileNotFoundError:
                if time.time() > deadline:
                    return None
                time.sleep(0.05)
            except OSError as exc:
                if exc.errno not in (errno.ENOENT, errno.ECONNREFUSED):
                    raise
                if time.time() > deadline:
                    return None
                time.sleep(0.05)
 def ipc_sender(ipc: IO[bytes] | None):
    """Create a helper function for sending script messages via IPC."""
    if ipc is None:
        def _noop(_event: str, _payload: dict) -> None:
            return None
        return _noop
    lock = threading.Lock()
    def _send(event: str, payload: dict) -> None:
        message = json.dumps({'command': ['script-message', event, json.dumps(payload)]}, ensure_ascii=False)
        encoded = message.encode('utf-8') + b'\n'
        with lock:
            try:
                ipc.write(encoded)
                ipc.flush()
            except OSError:
                pass
    return _send
 def iter_stream(stream: Iterable[str]) -> Iterable[str]:
    for raw in stream:
        yield raw.rstrip('\r\n')
 def _run_task(args, parser) -> int:
    if not args.command:
        parser.error('run-task requires a command to execute (use "--" before the command).')
    env = os.environ.copy()
    for entry in args.env:
        key, sep, value = entry.partition('=')
        if not sep:
            parser.error(f'Invalid environment variable definition: {entry!r}')
        env[key] = value
    command = list(args.command)
    if command and command[0] == '--':
        command.pop(0)
    notifier = ipc_sender(connect_ipc(args.ipc, timeout=args.ipc_timeout))
    if not command:
        notifier('downlow-task-event', {
            'id': args.task_id,
            'event': 'error',
            'message': 'No command provided after separator',
        })
        log('[downlow.py] No command provided for run-task', file=sys.stderr)
        return 1
    if command and isinstance(command[0], str) and sys.executable:
        first = command[0].lower()
        if first in {'python', 'python3', 'py', 'python.exe', 'python3.exe', 'py.exe'}:
            command[0] = sys.executable
    if os.environ.get('DOWNLOW_DEBUG'):
        log(f"Launching command: {command}", file=sys.stderr)
    notifier('downlow-task-event', {
        'id': args.task_id,
        'event': 'start',
        'command': command,
        'cwd': args.cwd or os.getcwd(),
    })
    try:
        process = subprocess.Popen(
            command,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            cwd=args.cwd or None,
            env=env,
            text=True,
            bufsize=1,
            universal_newlines=True,
        )
    except FileNotFoundError as exc:
        notifier('downlow-task-event', {
            'id': args.task_id,
            'event': 'error',
            'message': f'Executable not found: {exc.filename}',
        })
        log(f"{exc}", file=sys.stderr)
        return 1
    stdout_lines: list[str] = []
    stderr_lines: list[str] = []
    def pump(stream: IO[str], label: str, sink: list[str]) -> None:
        for line in iter_stream(stream):
            sink.append(line)
            notifier('downlow-task-event', {
                'id': args.task_id,
                'event': label,
                'line': line,
            })
    threads = []
    if process.stdout:
        t_out = threading.Thread(target=pump, args=(process.stdout, 'stdout', stdout_lines), daemon=True)
        t_out.start()
        threads.append(t_out)
    if process.stderr:
        t_err = threading.Thread(target=pump, args=(process.stderr, 'stderr', stderr_lines), daemon=True)
        t_err.start()
        threads.append(t_err)
    return_code = process.wait()
    for t in threads:
        t.join(timeout=0.1)
    notifier('downlow-task-event', {
        'id': args.task_id,
        'event': 'exit',
        'returncode': return_code,
        'success': return_code == 0,
    })
    # Also mirror aggregated output to stdout/stderr for compatibility when IPC is unavailable.
    if stdout_lines:
        log('\n'.join(stdout_lines))
    if stderr_lines:
        log('\n'.join(stderr_lines), file=sys.stderr)
    return return_code
--- a/helper/unified_book_downloader.py
+++ b/helper/unified_book_downloader.py
@@ -0,0 +1,706 @@
 """Unified book downloader - handles Archive.org borrowing and Libgen fallback.
 This module provides a single interface for downloading books from multiple sources:
 1. Try Archive.org direct download (if available)
 2. Try Archive.org borrowing (if user has credentials)
 3. Fallback to Libgen search by ISBN
 4. Attempt Libgen download
 All sources integrated with proper metadata scraping and error handling.
 """
 import logging
 import asyncio
 import requests
 from typing import Optional, Dict, Any, Tuple, List, Callable, cast
 from pathlib import Path
 from helper.logger import debug
 logger = logging.getLogger(__name__)
 class UnifiedBookDownloader:
    """Unified interface for downloading books from multiple sources."""
    def __init__(self, config: Optional[Dict[str, Any]] = None, output_dir: Optional[str] = None):
        """Initialize the unified book downloader.
        Args:
            config: Configuration dict with credentials
            output_dir: Default output directory
        """
        self.config = config or {}
        self.output_dir = output_dir
        self.session = requests.Session()
        # Import download functions from their modules
        self._init_downloaders()
    def _init_downloaders(self) -> None:
        """Initialize downloader functions from their modules."""
        try:
            from helper.archive_client import (
                check_direct_download,
                get_openlibrary_by_isbn,
                loan
            )
            self.check_direct_download = check_direct_download
            self.get_openlibrary_by_isbn = get_openlibrary_by_isbn
            self.loan_func = loan
            logger.debug("[UnifiedBookDownloader] Loaded archive.org downloaders from archive_client")
        except Exception as e:
            logger.warning(f"[UnifiedBookDownloader] Failed to load archive.org functions: {e}")
            self.check_direct_download = None
            self.get_openlibrary_by_isbn = None
            self.loan_func = None
        try:
            from helper.libgen_service import (
                DEFAULT_LIMIT as _LIBGEN_DEFAULT_LIMIT,
                download_from_mirror as _libgen_download,
                search_libgen as _libgen_search,
            )
            def _log_info(message: str) -> None:
                debug(f"[UnifiedBookDownloader] {message}")
            def _log_error(message: str) -> None:
                logger.error(f"[UnifiedBookDownloader] {message}")
            self.search_libgen = lambda query, limit=_LIBGEN_DEFAULT_LIMIT: _libgen_search(
                query,
                limit=limit,
                log_info=_log_info,
                log_error=_log_error,
            )
            self.download_from_mirror = lambda mirror_url, output_path: _libgen_download(
                mirror_url,
                output_path,
                log_info=_log_info,
                log_error=_log_error,
            )
            logger.debug("[UnifiedBookDownloader] Loaded Libgen helpers")
        except Exception as e:
            logger.warning(f"[UnifiedBookDownloader] Failed to load Libgen helpers: {e}")
            self.search_libgen = None
            self.download_from_mirror = None
    def get_download_options(self, book_data: Dict[str, Any]) -> Dict[str, Any]:
        """Get all available download options for a book.
        Checks in priority order:
        1. Archive.org direct download (public domain)
        2. Archive.org borrowing (if credentials available and book is borrowable)
        3. Libgen fallback (by ISBN)
        Args:
            book_data: Book metadata dict with at least 'openlibrary_id' or 'isbn'
        Returns:
            Dict with available download methods and metadata
        """
        options = {
            'book_title': book_data.get('title', 'Unknown'),
            'book_author': book_data.get('author', 'Unknown'),
            'isbn': book_data.get('isbn', ''),
            'openlibrary_id': book_data.get('openlibrary_id', ''),
            'methods': [],  # Will be sorted by priority
            'metadata': {}
        }
        # Extract book ID from openlibrary_id (e.g., OL8513721M -> 8513721, OL8513721W -> 8513721)
        ol_id = book_data.get('openlibrary_id', '')
        book_id = None
        if ol_id.startswith('OL') and len(ol_id) > 2:
            # Remove 'OL' prefix (keep everything after it including the suffix letter)
            # The book_id is all digits after 'OL'
            book_id = ''.join(c for c in ol_id[2:] if c.isdigit())
            # PRIORITY 1: Check direct download (fastest, no auth needed)
            if self.check_direct_download:
                try:
                    can_download, pdf_url = self.check_direct_download(book_id)
                    if can_download:
                        options['methods'].append({
                            'type': 'archive.org_direct',
                            'label': 'Archive.org Direct Download',
                            'requires_auth': False,
                            'pdf_url': pdf_url,
                            'book_id': book_id,
                            'priority': 1  # Highest priority
                        })
                        logger.info(f"[UnifiedBookDownloader] Direct download available for {book_id}")
                except Exception as e:
                    logger.debug(f"[UnifiedBookDownloader] Direct download check failed: {e}")
            # PRIORITY 2: Check borrowing option (requires auth, 14-day loan)
            # First verify the book is actually lendable via OpenLibrary API
            if self._has_archive_credentials():
                is_lendable, status = self._check_book_lendable_status(ol_id)
                if is_lendable:
                    options['methods'].append({
                        'type': 'archive.org_borrow',
                        'label': 'Archive.org Borrow',
                        'requires_auth': True,
                        'book_id': book_id,
                        'priority': 2  # Second priority
                    })
                    logger.info(f"[UnifiedBookDownloader] Borrow option available for {book_id} (status: {status})")
                else:
                    logger.debug(f"[UnifiedBookDownloader] Borrow not available for {book_id} (status: {status})")
        # PRIORITY 3: Check Libgen fallback (by ISBN, no auth needed, most reliable)
        isbn = book_data.get('isbn', '')
        title = book_data.get('title', '')
        author = book_data.get('author', '')
        if self.search_libgen:
            # Can use Libgen if we have ISBN OR title (or both)
            if isbn or title:
                options['methods'].append({
                    'type': 'libgen',
                    'label': 'Libgen Search & Download',
                    'requires_auth': False,
                    'isbn': isbn,
                    'title': title,
                    'author': author,
                    'priority': 3  # Third priority (fallback)
                })
                logger.info(f"[UnifiedBookDownloader] Libgen fallback available (ISBN: {isbn if isbn else 'N/A'}, Title: {title})")
        # Sort by priority (higher priority first)
        options['methods'].sort(key=lambda x: x.get('priority', 999))
        return options
    def _has_archive_credentials(self) -> bool:
        """Check if Archive.org credentials are available."""
        try:
            from helper.archive_client import credential_openlibrary
            email, password = credential_openlibrary(self.config)
            return bool(email and password)
        except Exception:
            return False
    def _check_book_lendable_status(self, ol_id: str) -> Tuple[bool, Optional[str]]:
        """Check if a book is lendable via OpenLibrary API.
        Queries: https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}
        Note: Only works with Edition IDs (OL...M), not Work IDs (OL...W)
        Args:
            ol_id: OpenLibrary ID (e.g., OL8513721M for Edition or OL4801915W for Work)
        Returns:
            Tuple of (is_lendable: bool, status_reason: Optional[str])
        """
        try:
            if not ol_id.startswith('OL'):
                return False, "Invalid OpenLibrary ID format"
            # If this is a Work ID (ends with W), we can't query Volumes API
            # Work IDs are abstract umbrella records, not specific editions
            if ol_id.endswith('W'):
                logger.debug(f"[UnifiedBookDownloader] Work ID {ol_id} - skipping Volumes API (not lendable)")
                return False, "Work ID not supported by Volumes API (not a specific edition)"
            # If it ends with M, it's an Edition ID - proceed with query
            if not ol_id.endswith('M'):
                logger.debug(f"[UnifiedBookDownloader] Unknown ID type {ol_id} (not M or W)")
                return False, "Invalid OpenLibrary ID type"
            url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}"
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            data = response.json()
            # Empty response means no records found
            if not data:
                logger.debug(f"[UnifiedBookDownloader] Empty response for {ol_id}")
                return False, "No availability data found"
            # The response is wrapped in OLID key
            olid_key = f"OLID:{ol_id}"
            if olid_key not in data:
                logger.debug(f"[UnifiedBookDownloader] OLID key not found in response")
                return False, "No availability data found"
            olid_data = data[olid_key]
            # Check items array for lendable status
            if 'items' in olid_data and olid_data['items'] and len(olid_data['items']) > 0:
                items = olid_data['items']
                # Check the first item for lending status
                first_item = items[0]
                # Handle both dict and string representations (PowerShell converts to string)
                if isinstance(first_item, dict):
                    status = first_item.get('status', '')
                else:
                    # String representation - check if 'lendable' is in it
                    status = str(first_item).lower()
                is_lendable = 'lendable' in str(status).lower()
                if is_lendable:
                    logger.info(f"[UnifiedBookDownloader] Book {ol_id} is lendable")
                    return True, "LENDABLE"
                else:
                    status_str = status.get('status', 'NOT_LENDABLE') if isinstance(status, dict) else 'NOT_LENDABLE'
                    logger.debug(f"[UnifiedBookDownloader] Book {ol_id} is not lendable (status: {status_str})")
                    return False, status_str
            else:
                # No items array or empty
                logger.debug(f"[UnifiedBookDownloader] No items found for {ol_id}")
                return False, "Not available for lending"
        except requests.exceptions.Timeout:
            logger.warning(f"[UnifiedBookDownloader] OpenLibrary API timeout for {ol_id}")
            return False, "API timeout"
        except Exception as e:
            logger.debug(f"[UnifiedBookDownloader] Failed to check lendable status for {ol_id}: {e}")
            return False, f"API error"
    async def download_book(self, method: Dict[str, Any], output_dir: Optional[str] = None) -> Tuple[bool, str]:
        """Download a book using the specified method.
        Args:
            method: Download method dict from get_download_options()
            output_dir: Directory to save the book
        Returns:
            Tuple of (success: bool, message: str)
        """
        output_dir = output_dir or self.output_dir or str(Path.home() / "Downloads")
        method_type = method.get('type', '')
        logger.info(f"[UnifiedBookDownloader] Starting download with method: {method_type}")
        try:
            if method_type == 'archive.org_direct':
                return await self._download_archive_direct(method, output_dir)
            elif method_type == 'archive.org_borrow':
                return await self._download_archive_borrow(method, output_dir)
            elif method_type == 'libgen':
                return await self._download_libgen(method, output_dir)
            else:
                return False, f"Unknown download method: {method_type}"
        except Exception as e:
            logger.error(f"[UnifiedBookDownloader] Download error: {e}", exc_info=True)
            return False, f"Download failed: {str(e)}"
    async def _download_archive_direct(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
        """Download directly from Archive.org."""
        try:
            pdf_url = method.get('pdf_url', '')
            book_id = method.get('book_id', '')
            if not pdf_url:
                return False, "No PDF URL available"
            # Determine output filename
            filename = f"{book_id}.pdf"
            output_path = Path(output_dir) / filename
            logger.info(f"[UnifiedBookDownloader] Downloading PDF from: {pdf_url}")
            # Download in a thread to avoid blocking
            loop = asyncio.get_event_loop()
            success = await loop.run_in_executor(
                None,
                self._download_file,
                pdf_url,
                str(output_path)
            )
            if success:
                logger.info(f"[UnifiedBookDownloader] Successfully downloaded to: {output_path}")
                return True, f"Downloaded to: {output_path}"
            else:
                return False, "Failed to download PDF"
        except Exception as e:
            logger.error(f"[UnifiedBookDownloader] Archive direct download error: {e}")
            return False, f"Archive download failed: {str(e)}"
    async def _download_archive_borrow(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
        """Download via Archive.org borrowing (requires credentials).
        Process (follows archive_client.py pattern):
        1. Login to Archive.org with credentials
        2. Call loan endpoint to borrow the book (14-day loan)
        3. Get book info (page links, metadata)
        4. Download all pages as images
        5. Merge images into PDF
        The loan function from archive_client.py handles:
        - Checking if book needs borrowing (status 400 = "doesn't need to be borrowed")
        - Creating borrow token for access
        - Handling borrow failures
        get_book_infos() extracts page links from the borrowed book viewer
        download() downloads all pages using thread pool
        img2pdf merges pages into searchable PDF
        """
        try:
            from helper.archive_client import credential_openlibrary
            book_id = method.get('book_id', '')
            # Get credentials
            email, password = credential_openlibrary(self.config)
            if not email or not password:
                return False, "Archive.org credentials not configured"
            logger.info(f"[UnifiedBookDownloader] Logging into Archive.org...")
            # Login and borrow (in thread, following download_book.py pattern)
            loop = asyncio.get_event_loop()
            borrow_result = await loop.run_in_executor(
                None,
                self._archive_borrow_and_download,
                email,
                password,
                book_id,
                output_dir
            )
            if borrow_result and isinstance(borrow_result, tuple):
                success, filepath = borrow_result
                if success:
                    logger.info(f"[UnifiedBookDownloader] Borrow succeeded: {filepath}")
                    return True, filepath
                else:
                    logger.warning(f"[UnifiedBookDownloader] Borrow failed: {filepath}")
                    return False, filepath
            else:
                return False, "Failed to borrow book from Archive.org"
        except Exception as e:
            logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
            return False, f"Archive borrow failed: {str(e)}"
    async def _download_libgen(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
        """Download via Libgen search and download with mirror fallback."""
        try:
            isbn = method.get('isbn', '')
            title = method.get('title', '')
            if not isbn and not title:
                return False, "Need ISBN or title for Libgen search"
            if not self.search_libgen:
                return False, "Libgen searcher not available"
            # Define wrapper functions to safely call the methods
            search_func = self.search_libgen
            if search_func is None:
                return False, "Search function not available"
            preloaded_results = method.get('results')
            loop = asyncio.get_event_loop()
            if preloaded_results:
                results = list(preloaded_results)
                if not results:
                    results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
            else:
                results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
            if not results:
                logger.warning(f"[UnifiedBookDownloader] No Libgen results for: {isbn or title}")
                return False, f"No Libgen results found for: {isbn or title}"
            logger.info(f"[UnifiedBookDownloader] Found {len(results)} Libgen results")
            # Determine output filename (use first result for naming)
            first_result = results[0]
            filename = f"{first_result.get('title', 'book')}"
            filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
            # Try each result's mirror until one succeeds
            for idx, result in enumerate(results, 1):
                mirror_url = result.get('mirror_url', '')
                if not mirror_url:
                    logger.debug(f"[UnifiedBookDownloader] Result {idx}: No mirror URL")
                    continue
                # Use extension from this result if available
                extension = result.get('extension', 'pdf')
                if extension and not extension.startswith('.'):
                    extension = f".{extension}"
                elif not extension:
                    extension = '.pdf'
                output_path = Path(output_dir) / (filename + extension)
                logger.info(f"[UnifiedBookDownloader] Trying mirror {idx}/{len(results)}: {mirror_url}")
                download_func = self.download_from_mirror
                if download_func is None:
                    return False, "Download function not available"
                download_callable = cast(Callable[[str, str], bool], download_func)
                def download_wrapper():
                    return download_callable(mirror_url, str(output_path))
                # Download (in thread)
                try:
                    success = await loop.run_in_executor(None, download_wrapper)
                    if success:
                        # Validate downloaded file is not HTML (common Libgen issue)
                        if output_path.exists():
                            try:
                                with open(output_path, 'rb') as f:
                                    file_start = f.read(1024).decode('utf-8', errors='ignore').lower()
                                    if '<!doctype' in file_start or '<html' in file_start:
                                        logger.warning(f"[UnifiedBookDownloader] Mirror {idx} returned HTML instead of file, trying next mirror...")
                                        output_path.unlink()  # Delete the HTML file
                                        continue
                            except Exception as e:
                                logger.debug(f"[UnifiedBookDownloader] Could not validate file content: {e}")
                        logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {output_path}")
                        return True, str(output_path)
                    else:
                        logger.warning(f"[UnifiedBookDownloader] Mirror {idx} download failed, trying next...")
                except Exception as e:
                    logger.warning(f"[UnifiedBookDownloader] Mirror {idx} error: {e}, trying next...")
                    continue
            return False, f"All {len(results)} mirrors failed"
        except Exception as e:
            logger.error(f"[UnifiedBookDownloader] Libgen download error: {e}")
            return False, f"Libgen download failed: {str(e)}"
    async def download_libgen_selection(
        self,
        selected: Dict[str, Any],
        remaining: Optional[List[Dict[str, Any]]] = None,
        output_dir: Optional[str] = None,
    ) -> Tuple[bool, str]:
        """Download a specific Libgen result with optional fallbacks."""
        if not isinstance(selected, dict):
            return False, "Selected result must be a dictionary"
        ordered_results: List[Dict[str, Any]] = [selected]
        if remaining:
            for item in remaining:
                if isinstance(item, dict) and item is not selected:
                    ordered_results.append(item)
        method: Dict[str, Any] = {
            'type': 'libgen',
            'isbn': selected.get('isbn', '') or '',
            'title': selected.get('title', '') or '',
            'author': selected.get('author', '') or '',
            'results': ordered_results,
        }
        return await self.download_book(method, output_dir)
    def download_libgen_selection_sync(
        self,
        selected: Dict[str, Any],
        remaining: Optional[List[Dict[str, Any]]] = None,
        output_dir: Optional[str] = None,
    ) -> Tuple[bool, str]:
        """Synchronous helper for downloading a Libgen selection."""
        async def _run() -> Tuple[bool, str]:
            return await self.download_libgen_selection(selected, remaining, output_dir)
        loop = asyncio.new_event_loop()
        try:
            asyncio.set_event_loop(loop)
            return loop.run_until_complete(_run())
        finally:
            loop.close()
            asyncio.set_event_loop(None)
    def _download_file(self, url: str, output_path: str) -> bool:
        """Download a file from URL."""
        try:
            response = requests.get(url, stream=True, timeout=30)
            response.raise_for_status()
            with open(output_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
            return True
        except Exception as e:
            logger.error(f"[UnifiedBookDownloader] File download error: {e}")
            return False
    def _archive_borrow_and_download(self, email: str, password: str, book_id: str, output_dir: str) -> Tuple[bool, str]:
        """Borrow a book from Archive.org and download pages as PDF.
        This follows the exact process from archive_client.py:
        1. Login with credentials
        2. Call loan() to create 14-day borrow
        3. Get book info (extract page URLs)
        4. Download all pages as images
        5. Merge images into searchable PDF
        Returns tuple of (success: bool, filepath/message: str)
        """
        try:
            from helper.archive_client import login, loan, get_book_infos, download
            import tempfile
            import shutil
            logger.info(f"[UnifiedBookDownloader] Logging into Archive.org as {email}")
            session = login(email, password)
            logger.info(f"[UnifiedBookDownloader] Attempting to borrow book: {book_id}")
            # Call loan to create the 14-day borrow
            session = loan(session, book_id, verbose=True)
            # If we get here, borrowing succeeded
            logger.info(f"[UnifiedBookDownloader] Successfully borrowed book: {book_id}")
            # Now get the book info (page URLs and metadata)
            logger.info(f"[UnifiedBookDownloader] Extracting book page information...")
            # Try both URL formats: with /borrow and without
            book_urls = [
                f"https://archive.org/borrow/{book_id}",  # Try borrow page first (for borrowed books)
                f"https://archive.org/details/{book_id}"   # Fallback to details page
            ]
            title = None
            links = None
            metadata = None
            last_error = None
            for book_url in book_urls:
                try:
                    logger.debug(f"[UnifiedBookDownloader] Trying to get book info from: {book_url}")
                    response = session.get(book_url, timeout=10)
                    # Log response status
                    if response.status_code != 200:
                        logger.debug(f"[UnifiedBookDownloader] URL returned {response.status_code}: {book_url}")
                        # Continue to try next URL
                        continue
                    # Try to parse the response
                    title, links, metadata = get_book_infos(session, book_url)
                    logger.info(f"[UnifiedBookDownloader] Successfully got info from: {book_url}")
                    logger.info(f"[UnifiedBookDownloader] Found {len(links)} pages to download")
                    break
                except Exception as e:
                    logger.debug(f"[UnifiedBookDownloader] Failed with {book_url}: {e}")
                    last_error = e
                    continue
            if links is None:
                logger.error(f"[UnifiedBookDownloader] Failed to get book info from all URLs: {last_error}")
                # Borrow extraction failed - return False
                return False, "Could not extract borrowed book pages"
            # Create temporary directory for images
            temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=output_dir)
            logger.info(f"[UnifiedBookDownloader] Downloading {len(links)} pages to temporary directory...")
            try:
                # Download all pages (uses thread pool)
                images = download(
                    session=session,
                    n_threads=10,
                    directory=temp_dir,
                    links=links,
                    scale=3,  # Default resolution
                    book_id=book_id
                )
                logger.info(f"[UnifiedBookDownloader] Downloaded {len(images)} pages")
                # Try to merge pages into PDF
                try:
                    import img2pdf
                    logger.info(f"[UnifiedBookDownloader] Merging pages into PDF...")
                    # Prepare PDF metadata
                    pdfmeta = {}
                    if metadata:
                        if "title" in metadata:
                            pdfmeta["title"] = metadata["title"]
                        if "creator" in metadata:
                            pdfmeta["author"] = metadata["creator"]
                    pdfmeta["keywords"] = [f"https://archive.org/details/{book_id}"]
                    pdfmeta["creationdate"] = None  # Avoid timezone issues
                    # Convert images to PDF
                    pdf_content = img2pdf.convert(images, **pdfmeta) if images else None
                    if not pdf_content:
                        logger.error(f"[UnifiedBookDownloader] PDF conversion failed")
                        return False, "Failed to convert pages to PDF"
                    # Save the PDF
                    pdf_filename = f"{title}.pdf" if title else "book.pdf"
                    pdf_path = Path(output_dir) / pdf_filename
                    # Handle duplicate filenames
                    i = 1
                    while pdf_path.exists():
                        pdf_path = Path(output_dir) / f"{title or 'book'}({i}).pdf"
                        i += 1
                    with open(pdf_path, 'wb') as f:
                        f.write(pdf_content)
                    logger.info(f"[UnifiedBookDownloader] Successfully created PDF: {pdf_path}")
                    return True, str(pdf_path)
                except ImportError:
                    logger.warning(f"[UnifiedBookDownloader] img2pdf not available, saving as JPG collection instead")
                    # Create JPG collection directory
                    if not title:
                        title = f"book_{book_id}"
                    jpg_dir = Path(output_dir) / title
                    i = 1
                    while jpg_dir.exists():
                        jpg_dir = Path(output_dir) / f"{title}({i})"
                        i += 1
                    # Move temporary directory to final location
                    shutil.move(temp_dir, str(jpg_dir))
                    temp_dir = None  # Mark as already moved
                    logger.info(f"[UnifiedBookDownloader] Saved as JPG collection: {jpg_dir}")
                    return True, str(jpg_dir)
            finally:
                # Clean up temporary directory if it still exists
                if temp_dir and Path(temp_dir).exists():
                    shutil.rmtree(temp_dir)
        except SystemExit:
            # loan() function calls sys.exit on failure - catch it
            logger.error(f"[UnifiedBookDownloader] Borrow process exited (book may not be borrowable)")
            return False, "Book could not be borrowed (may not be available for borrowing)"
        except Exception as e:
            logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
            return False, f"Borrow failed: {str(e)}"
    def close(self) -> None:
        """Close the session."""
        self.session.close()
--- a/helper/utils.py
+++ b/helper/utils.py
@@ -0,0 +1,492 @@
 """General-purpose helpers used across the downlow CLI."""
 from __future__ import annotations
 import json
 import hashlib
 import ffmpeg
 import base64
 import logging
 import time
 from pathlib import Path
 from typing import Any, Iterable
 from datetime import datetime
 from dataclasses import dataclass, field
 from fnmatch import fnmatch
 from urllib.parse import urlparse
 import helper.utils_constant
 try:
    import cbor2
 except ImportError:
    cbor2 = None  # type: ignore
 CHUNK_SIZE = 1024 * 1024  # 1 MiB
 _format_logger = logging.getLogger(__name__)
 def ensure_directory(path: Path) -> None:
    """Ensure *path* exists as a directory."""
    try:
        path.mkdir(parents=True, exist_ok=True)
    except OSError as exc:  # pragma: no cover - surfaced to caller
        raise RuntimeError(f"Failed to create directory {path}: {exc}") from exc
 def unique_path(path: Path) -> Path:
    """Return a unique path by appending " (n)" if needed."""
    if not path.exists():
        return path
    stem = path.stem
    suffix = path.suffix
    parent = path.parent
    counter = 1
    while True:
        candidate = parent / f"{stem} ({counter}){suffix}"
        if not candidate.exists():
            return candidate
        counter += 1
 def sanitize_metadata_value(value: Any) -> str | None:
    if value is None:
        return None
    if not isinstance(value, str):
        value = str(value)
    value = value.replace('\x00', ' ').replace('\r', ' ').replace('\n', ' ').strip()
    if not value:
        return None
    return value
 def unique_preserve_order(values: Iterable[str]) -> list[str]:
    seen: set[str] = set()
    ordered: list[str] = []
    for value in values:
        if value not in seen:
            seen.add(value)
            ordered.append(value)
    return ordered
 def sha256_file(file_path: Path) -> str:
    """Return the SHA-256 hex digest of *path*."""
    hasher = hashlib.sha256()
    with file_path.open('rb') as handle:
        for chunk in iter(lambda: handle.read(CHUNK_SIZE), b''):
            hasher.update(chunk)
    return hasher.hexdigest()
 def create_metadata_sidecar(file_path: Path, metadata: dict) -> None:
    """Create a .metadata sidecar file with JSON metadata.
    The metadata dict should contain title. If not present, it will be derived from
    the filename. This ensures the .metadata file can be matched during batch import.
    Args:
        file_path: Path to the exported file
        metadata: Dictionary of metadata to save
    """
    if not metadata:
        return
    file_name = file_path.stem
    file_ext = file_path.suffix.lower()
    # Ensure metadata has a title field that matches the filename (without extension)
    # This allows the sidecar to be matched and imported properly during batch import
    if 'title' not in metadata or not metadata.get('title'):
        metadata['title'] = file_name
    metadata['hash'] = sha256_file(file_path)
    metadata['size'] = Path(file_path).stat().st_size
    format_found = False
    for mime_type, ext_map in helper.utils_constant.mime_maps.items():
        for key, info in ext_map.items():
            if info.get("ext") == file_ext:
                metadata['type'] = mime_type
                format_found = True
                break
        if format_found:
            break
    else:
        metadata['type'] = 'unknown'
    metadata.update(ffprobe(str(file_path)))
    metadata_path = file_path.with_suffix(file_path.suffix + '.metadata')
    try:
        with open(metadata_path, 'w', encoding='utf-8') as f:
            json.dump(metadata, f, ensure_ascii=False, indent=2)
    except OSError as exc:
        raise RuntimeError(f"Failed to write metadata sidecar {metadata_path}: {exc}") from exc
 def create_tags_sidecar(file_path: Path, tags: set) -> None:
    """Create a .tags sidecar file with tags (one per line).
    Args:
        file_path: Path to the exported file
        tags: Set of tag strings
    """
    if not tags:
        return
    tags_path = file_path.with_suffix(file_path.suffix + '.tags')
    try:
        with open(tags_path, 'w', encoding='utf-8') as f:
            for tag in sorted(tags):
                f.write(f"{tag}\n")
    except Exception as e:
        raise RuntimeError(f"Failed to create tags sidecar {tags_path}: {e}") from e
 def ffprobe(file_path: str) -> dict:
    probe = ffmpeg.probe(file_path)
    metadata = {}
    # Format-level info
    fmt = probe.get("format", {})
    metadata["duration"] = float(fmt.get("duration", 0)) if "duration" in fmt else None
    metadata["size"] = int(fmt.get("size", 0)) if "size" in fmt else None
    metadata["format_name"] = fmt.get("format_name", None)
    # Stream-level info
    for stream in probe.get("streams", []):
        codec_type = stream.get("codec_type")
        if codec_type == "audio":
            metadata["audio_codec"] = stream.get("codec_name")
            metadata["bitrate"] = int(stream.get("bit_rate", 0)) if "bit_rate" in stream else None
            metadata["samplerate"] = int(stream.get("sample_rate", 0)) if "sample_rate" in stream else None
            metadata["channels"] = int(stream.get("channels", 0)) if "channels" in stream else None
        elif codec_type == "video":
            metadata["video_codec"] = stream.get("codec_name")
            metadata["width"] = int(stream.get("width", 0)) if "width" in stream else None
            metadata["height"] = int(stream.get("height", 0)) if "height" in stream else None
        elif codec_type == "image":
            metadata["image_codec"] = stream.get("codec_name")
            metadata["width"] = int(stream.get("width", 0)) if "width" in stream else None
            metadata["height"] = int(stream.get("height", 0)) if "height" in stream else None
    return metadata
 # ============================================================================
 # CBOR Utilities - Consolidated from cbor.py
 # ============================================================================
 """CBOR utilities backed by the `cbor2` library."""
 def decode_cbor(data: bytes) -> Any:
    """Decode *data* from CBOR into native Python objects."""
    if not data:
        return None
    if cbor2 is None:
        raise ImportError("cbor2 library is required for CBOR decoding")
    return cbor2.loads(data)
 def jsonify(value: Any) -> Any:
    """Convert *value* into a JSON-friendly structure."""
    if isinstance(value, dict):
        return {str(key): jsonify(val) for key, val in value.items()}
    if isinstance(value, list):
        return [jsonify(item) for item in value]
    if isinstance(value, bytes):
        return {"__bytes__": base64.b64encode(value).decode("ascii")}
    return value
 # ============================================================================
 # Format Utilities - Consolidated from format_utils.py
 # ============================================================================
 """Formatting utilities for displaying metadata consistently across the application."""
 def format_bytes(bytes_value) -> str:
    """Format bytes to human-readable format (e.g., '1.5 MB', '250 KB').
    Args:
        bytes_value: Size in bytes (int or float)
    Returns:
        Formatted string like '1.5 MB' or '756 MB'
    """
    if bytes_value is None or bytes_value <= 0:
        return "0 B"
    if isinstance(bytes_value, (int, float)):
        for unit in ("B", "KB", "MB", "GB", "TB"):
            if bytes_value < 1024:
                if unit == "B":
                    return f"{int(bytes_value)} {unit}"
                return f"{bytes_value:.1f} {unit}"
            bytes_value /= 1024
        return f"{bytes_value:.1f} PB"
    return str(bytes_value)
 def format_duration(seconds) -> str:
    """Format duration in seconds to human-readable format (e.g., '1h 23m 5s', '5m 30s').
    Args:
        seconds: Duration in seconds (int or float)
    Returns:
        Formatted string like '1:23:45' or '5:30'
    """
    if seconds is None or seconds == '':
        return "N/A"
    if isinstance(seconds, str):
        try:
            seconds = float(seconds)
        except ValueError:
            return str(seconds)
    if not isinstance(seconds, (int, float)):
        return str(seconds)
    total_seconds = int(seconds)
    if total_seconds < 0:
        return "N/A"
    hours = total_seconds // 3600
    minutes = (total_seconds % 3600) // 60
    secs = total_seconds % 60
    if hours > 0:
        return f"{hours}:{minutes:02d}:{secs:02d}"
    elif minutes > 0:
        return f"{minutes}:{secs:02d}"
    else:
        return f"{secs}s"
 def format_timestamp(timestamp_str) -> str:
    """Format ISO timestamp to readable format.
    Args:
        timestamp_str: ISO format timestamp string or None
    Returns:
        Formatted string like "2025-10-28 19:36:01" or original string if parsing fails
    """
    if not timestamp_str:
        return "N/A"
    try:
        # Handle ISO format timestamps
        if isinstance(timestamp_str, str):
            # Try parsing ISO format
            if 'T' in timestamp_str:
                dt = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
            else:
                # Try other common formats
                dt = datetime.fromisoformat(timestamp_str)
            return dt.strftime("%Y-%m-%d %H:%M:%S")
    except Exception as e:
        _format_logger.debug(f"Could not parse timestamp '{timestamp_str}': {e}")
    return str(timestamp_str)
 def format_metadata_value(key: str, value) -> str:
    """Format a metadata value based on its key for display.
    This is the central formatting rule for all metadata display.
    Args:
        key: Metadata field name
        value: Value to format
    Returns:
        Formatted string for display
    """
    if value is None or value == '':
        return "N/A"
    # Apply field-specific formatting
    if key in ('size', 'file_size'):
        return format_bytes(value)
    elif key in ('duration', 'length'):
        return format_duration(value)
    elif key in ('time_modified', 'time_imported', 'created_at', 'updated_at', 'indexed_at', 'timestamp'):
        return format_timestamp(value)
    else:
        return str(value)
 # ============================================================================
 # Link Utilities - Consolidated from link_utils.py
 # ============================================================================
 """Link utilities - Extract and process URLs from various sources."""
 def extract_link_from_args(args: Iterable[str]) -> Any | None:
    """Extract HTTP/HTTPS URL from command arguments.
    Args:
        args: Command arguments
    Returns:
        URL string if found, None otherwise
    """
    args_list = list(args) if not isinstance(args, (list, tuple)) else args
    if not args_list or len(args_list) == 0:
        return None
    potential_link = str(args_list[0])
    if potential_link.startswith(('http://', 'https://')):
        return potential_link
    return None
 def extract_link_from_result(result: Any) -> Any | None:
    """Extract URL from a result object (dict or object with attributes).
    Args:
        result: Result object from pipeline (dict or object)
    Returns:
        URL string if found, None otherwise
    """
    if isinstance(result, dict):
        return result.get('url') or result.get('link') or result.get('href')
    return (
        getattr(result, 'url', None) or 
        getattr(result, 'link', None) or 
        getattr(result, 'href', None)
    )
 def extract_link(result: Any, args: Iterable[str]) -> Any | None:
    """Extract link from args or result (args take priority).
    Args:
        result: Pipeline result object
        args: Command arguments
    Returns:
        URL string if found, None otherwise
    """
    # Try args first
    link = extract_link_from_args(args)
    if link:
        return link
    # Fall back to result
    return extract_link_from_result(result)
 def get_api_key(config: dict[str, Any], service: str, key_path: str) -> str | None:
    """Get API key from config with fallback support.
    Args:
        config: Configuration dictionary
        service: Service name for logging
        key_path: Dot-notation path to key (e.g., "Debrid.All-debrid")
    Returns:
        API key if found and not empty, None otherwise
    """
    try:
        parts = key_path.split('.')
        value = config
        for part in parts:
            if isinstance(value, dict):
                value = value.get(part)
            else:
                return None
        if isinstance(value, str):
            return value.strip() or None
        return None
    except Exception:
        return None
 def add_direct_link_to_result(result: Any, direct_link: str, original_link: str) -> None:
    """Add direct link information to result object.
    Args:
        result: Result object to modify (dict or object)
        direct_link: The unlocked/direct URL
        original_link: The original restricted URL
    """
    if isinstance(result, dict):
        result['direct_link'] = direct_link
        result['original_link'] = original_link
    else:
        setattr(result, 'direct_link', direct_link)
        setattr(result, 'original_link', original_link)
 # ============================================================================
 # URL Policy Resolution - Consolidated from url_parser.py
 # ============================================================================
 """URL policy resolution for downlow workflows."""
@dataclass(slots=True)
 class UrlPolicy:
    """Describe how a URL should be handled by download and screenshot flows."""
    skip_download: bool = False
    skip_metadata: bool = False
    force_screenshot: bool = False
    extra_tags: list[str] = field(default_factory=list)
    def apply_tags(self, sources: Iterable[str]) -> list[str]:
        tags = [tag.strip() for tag in self.extra_tags if tag and tag.strip()]
        for value in sources:
            text = str(value).strip()
            if text:
                tags.append(text)
        return tags
 def _normalise_rule(rule: dict[str, Any]) -> dict[str, Any] | None:
    pattern = str(rule.get("pattern") or rule.get("host") or "").strip()
    if not pattern:
        return None
    skip_download = bool(rule.get("skip_download"))
    skip_metadata = bool(rule.get("skip_metadata"))
    force_screenshot = bool(rule.get("force_screenshot"))
    extra_tags_raw = rule.get("extra_tags")
    if isinstance(extra_tags_raw, str):
        extra_tags = [part.strip() for part in extra_tags_raw.split(",") if part.strip()]
    elif isinstance(extra_tags_raw, (list, tuple, set)):
        extra_tags = [str(item).strip() for item in extra_tags_raw if str(item).strip()]
    else:
        extra_tags = []
    return {
        "pattern": pattern,
        "skip_download": skip_download,
        "skip_metadata": skip_metadata,
        "force_screenshot": force_screenshot,
        "extra_tags": extra_tags,
    }
 def resolve_url_policy(config: dict[str, Any], url: str) -> UrlPolicy:
    policies_raw = config.get("url_policies")
    if not policies_raw:
        return UrlPolicy()
    if not isinstance(policies_raw, list):
        return UrlPolicy()
    parsed = urlparse(url)
    subject = f"{parsed.netloc}{parsed.path}"
    host = parsed.netloc
    resolved = UrlPolicy()
    for rule_raw in policies_raw:
        if not isinstance(rule_raw, dict):
            continue
        rule = _normalise_rule(rule_raw)
        if rule is None:
            continue
        pattern = rule["pattern"]
        if not (fnmatch(host, pattern) or fnmatch(subject, pattern)):
            continue
        if rule["skip_download"]:
            resolved.skip_download = True
        if rule["skip_metadata"]:
            resolved.skip_metadata = True
        if rule["force_screenshot"]:
            resolved.force_screenshot = True
        if rule["extra_tags"]:
            for tag in rule["extra_tags"]:
                if tag not in resolved.extra_tags:
                    resolved.extra_tags.append(tag)
    return resolved
--- a/helper/utils_constant.py
+++ b/helper/utils_constant.py
@@ -0,0 +1,79 @@
 mime_maps = {
  "image": {
    "jpg":  { "ext": ".jpg", "mimes": ["image/jpeg", "image/jpg"] },
    "png":  { "ext": ".png", "mimes": ["image/png"] },
    "gif":  { "ext": ".gif", "mimes": ["image/gif"] },
    "webp": { "ext": ".webp", "mimes": ["image/webp"] },
    "avif": { "ext": ".avif", "mimes": ["image/avif"] },
    "jxl":  { "ext": ".jxl", "mimes": ["image/jxl"] },
    "bmp":  { "ext": ".bmp", "mimes": ["image/bmp"] },
    "heic": { "ext": ".heic", "mimes": ["image/heic"] },
    "heif": { "ext": ".heif", "mimes": ["image/heif"] },
    "ico":  { "ext": ".ico", "mimes": ["image/x-icon", "image/vnd.microsoft.icon"] },
    "qoi":  { "ext": ".qoi", "mimes": ["image/qoi"] },
    "tiff": { "ext": ".tiff", "mimes": ["image/tiff", "image/x-tiff"] },
    "svg":  { "ext": ".svg", "mimes": ["image/svg+xml"] }
  },
  "image_sequence": {
    "apng":  { "ext": ".apng", "mimes": ["image/apng"], "sequence": True },
    "avifs": { "ext": ".avifs", "mimes": ["image/avif-sequence"], "sequence": True },
    "heics": { "ext": ".heics", "mimes": ["image/heic-sequence"], "sequence": True },
    "heifs": { "ext": ".heifs", "mimes": ["image/heif-sequence"], "sequence": True }
  },
  "video": {
    "mp4": { "ext": ".mp4", "mimes": ["video/mp4", "audio/mp4"] },
    "webm": { "ext": ".webm", "mimes": ["video/webm", "audio/webm"] },
    "mov":  { "ext": ".mov", "mimes": ["video/quicktime"] },
    "ogv":  { "ext": ".ogv", "mimes": ["video/ogg"] },
    "mpeg": { "ext": ".mpeg", "mimes": ["video/mpeg"] },
    "avi":  { "ext": ".avi", "mimes": ["video/x-msvideo", "video/avi"] },
    "flv":  { "ext": ".flv", "mimes": ["video/x-flv"] },
    "mkv":  { "ext": ".mkv", "mimes": ["video/x-matroska", "application/x-matroska"], "audio_only_ext": ".mka" },
    "wmv":  { "ext": ".wmv", "mimes": ["video/x-ms-wmv"] },
    "rv":   { "ext": ".rv",  "mimes": ["video/vnd.rn-realvideo"] }
  },
  "audio": {
    "mp3":  { "ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"] },
    "m4a":  { "ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"] },
    "ogg":  { "ext": ".ogg", "mimes": ["audio/ogg"] },
    "flac": { "ext": ".flac", "mimes": ["audio/flac"] },
    "wav":  { "ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"] },
    "wma":  { "ext": ".wma", "mimes": ["audio/x-ms-wma"] },
    "tta":  { "ext": ".tta", "mimes": ["audio/x-tta"] },
    "wv":   { "ext": ".wv",  "mimes": ["audio/x-wavpack", "audio/wavpack"] },
    "mka":  { "ext": ".mka", "mimes": ["audio/x-matroska", "video/x-matroska"] }
  },
  "document": {
    "pdf":  { "ext": ".pdf", "mimes": ["application/pdf"] },
    "epub": { "ext": ".epub", "mimes": ["application/epub+zip"] },
    "djvu": { "ext": ".djvu", "mimes": ["application/vnd.djvu"] },
    "rtf":  { "ext": ".rtf", "mimes": ["application/rtf"] },
    "docx": { "ext": ".docx", "mimes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] },
    "xlsx": { "ext": ".xlsx", "mimes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] },
    "pptx": { "ext": ".pptx", "mimes": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] },
    "doc":  { "ext": ".doc", "mimes": ["application/msword"] },
    "xls":  { "ext": ".xls", "mimes": ["application/vnd.ms-excel"] },
    "ppt":  { "ext": ".ppt", "mimes": ["application/vnd.ms-powerpoint"] }
  },
  "archive": {
    "zip":  { "ext": ".zip", "mimes": ["application/zip"] },
    "7z":   { "ext": ".7z",  "mimes": ["application/x-7z-compressed"] },
    "rar":  { "ext": ".rar", "mimes": ["application/x-rar-compressed", "application/vnd.rar"] },
    "gz":   { "ext": ".gz",  "mimes": ["application/gzip", "application/x-gzip"] },
    "tar":  { "ext": ".tar", "mimes": ["application/x-tar"] },
    "cbz":  { "ext": ".cbz", "mimes": ["application/zip"], "note": "zip archive of images; prefer extension-based detection for comics" }
  },
  "project": {
    "clip":      { "ext": ".clip", "mimes": ["application/clip"] },
    "kra":       { "ext": ".kra",  "mimes": ["application/x-krita"] },
    "procreate": { "ext": ".procreate", "mimes": ["application/x-procreate"] },
    "psd":       { "ext": ".psd",  "mimes": ["image/vnd.adobe.photoshop"] },
    "swf":       { "ext": ".swf",  "mimes": ["application/x-shockwave-flash"] }
  },
  "other": {
    "octet-stream": { "ext": "", "mimes": ["application/octet-stream"] },
    "json":  { "ext": ".json",  "mimes": ["application/json"] },
    "xml":   { "ext": ".xml",   "mimes": ["application/xml", "text/xml"] },
    "csv":   { "ext": ".csv",   "mimes": ["text/csv"] }
  }
 }
--- a/helper/worker_manager.py
+++ b/helper/worker_manager.py
@@ -0,0 +1,655 @@
 """Worker task management with persistent database storage.
 Manages worker tasks for downloads, searches, imports, etc. with automatic
 persistence to database and optional auto-refresh callbacks.
 """
 import logging
 from pathlib import Path
 from typing import Optional, Dict, Any, List, Callable
 from datetime import datetime
 from threading import Thread, Lock
 import time
 from .local_library import LocalLibraryDB
 from helper.logger import log
 logger = logging.getLogger(__name__)
 class Worker:
    """Represents a single worker task with state management."""
    def __init__(self, worker_id: str, worker_type: str, title: str = "", 
                 description: str = "", manager: Optional['WorkerManager'] = None):
        """Initialize a worker.
        Args:
            worker_id: Unique identifier for this worker
            worker_type: Type of work (e.g., 'download', 'search', 'import')
            title: Human-readable title
            description: Detailed description
            manager: Reference to parent WorkerManager for state updates
        """
        self.id = worker_id
        self.type = worker_type
        self.title = title or worker_type
        self.description = description
        self.manager = manager
        self.status = "running"
        self.progress = ""
        self.details = ""
        self.error_message = ""
        self.result = "pending"
        self._stdout_buffer = []
        self._steps_buffer = []
    def log_step(self, step_text: str) -> None:
        """Log a step for this worker.
        Args:
            step_text: Text describing the step
        """
        try:
            if self.manager:
                self.manager.log_step(self.id, step_text)
            else:
                logger.info(f"[{self.id}] {step_text}")
        except Exception as e:
            logger.error(f"Error logging step for worker {self.id}: {e}")
    def append_stdout(self, text: str) -> None:
        """Append text to stdout log.
        Args:
            text: Text to append
        """
        try:
            if self.manager:
                self.manager.append_worker_stdout(self.id, text)
            else:
                self._stdout_buffer.append(text)
        except Exception as e:
            logger.error(f"Error appending stdout for worker {self.id}: {e}")
    def get_stdout(self) -> str:
        """Get all stdout for this worker.
        Returns:
            Complete stdout text
        """
        try:
            if self.manager:
                return self.manager.get_stdout(self.id)
            else:
                return "\n".join(self._stdout_buffer)
        except Exception as e:
            logger.error(f"Error getting stdout for worker {self.id}: {e}")
            return ""
    def get_steps(self) -> str:
        """Get all steps for this worker.
        Returns:
            Complete steps text
        """
        try:
            if self.manager:
                return self.manager.get_steps(self.id)
            else:
                return "\n".join(self._steps_buffer)
        except Exception as e:
            logger.error(f"Error getting steps for worker {self.id}: {e}")
            return ""
    def update_progress(self, progress: str = "", details: str = "") -> None:
        """Update worker progress.
        Args:
            progress: Progress string (e.g., "50%")
            details: Additional details
        """
        self.progress = progress
        self.details = details
        try:
            if self.manager:
                self.manager.update_worker(self.id, progress, details)
        except Exception as e:
            logger.error(f"Error updating worker {self.id}: {e}")
    def finish(self, result: str = "completed", message: str = "") -> None:
        """Mark worker as finished.
        Args:
            result: Result status ('completed', 'error', 'cancelled')
            message: Result message/error details
        """
        self.result = result
        self.status = "finished"
        self.error_message = message
        try:
            if self.manager:
                # Flush and disable logging handler before marking finished
                self.manager.disable_logging_for_worker(self.id)
                # Then mark as finished in database
                self.manager.finish_worker(self.id, result, message)
        except Exception as e:
            logger.error(f"Error finishing worker {self.id}: {e}")
 class WorkerLoggingHandler(logging.StreamHandler):
    """Custom logging handler that captures logs for a worker."""
    def __init__(self, worker_id: str, db: LocalLibraryDB,
                 manager: Optional['WorkerManager'] = None,
                 buffer_size: int = 50):
        """Initialize the handler.
        Args:
            worker_id: ID of the worker to capture logs for
            db: Reference to LocalLibraryDB for storing logs
            buffer_size: Number of logs to buffer before flushing to DB
        """
        super().__init__()
        self.worker_id = worker_id
        self.db = db
        self.manager = manager
        self.buffer_size = buffer_size
        self.buffer = []
        self._lock = Lock()
        # Set a format that includes timestamp and level
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        )
        self.setFormatter(formatter)
    def emit(self, record):
        """Emit a log record."""
        try:
            # Try to format the record normally
            try:
                msg = self.format(record)
            except (TypeError, ValueError):
                # If formatting fails (e.g., %d format with non-int arg),
                # build message manually without calling getMessage()
                try:
                    # Try to format with args if possible
                    if record.args:
                        msg = record.msg % record.args
                    else:
                        msg = record.msg
                except (TypeError, ValueError):
                    # If that fails too, just use the raw message string
                    msg = str(record.msg)
                # Add timestamp and level if not already in message
                import time
                timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(record.created))
                msg = f"{timestamp} - {record.name} - {record.levelname} - {msg}"
            with self._lock:
                self.buffer.append(msg)
                # Flush to DB when buffer reaches size
                if len(self.buffer) >= self.buffer_size:
                    self._flush()
        except Exception:
            self.handleError(record)
    def _flush(self):
        """Flush buffered logs to database."""
        if self.buffer:
            log_text = '\n'.join(self.buffer)
            try:
                if self.manager:
                    self.manager.append_worker_stdout(self.worker_id, log_text, channel='log')
                else:
                    self.db.append_worker_stdout(self.worker_id, log_text, channel='log')
            except Exception as e:
                # If we can't write to DB, at least log it
                log(f"Error flushing worker logs: {e}")
            self.buffer = []
    def flush(self):
        """Flush any buffered records."""
        with self._lock:
            self._flush()
        super().flush()
    def close(self):
        """Close the handler."""
        self.flush()
        super().close()
 class WorkerManager:
    """Manages persistent worker tasks with auto-refresh capability."""
    def __init__(self, library_root: Path, auto_refresh_interval: float = 2.0):
        """Initialize the worker manager.
        Args:
            library_root: Root directory for the local library database
            auto_refresh_interval: Seconds between auto-refresh checks (0 = disabled)
        """
        self.library_root = Path(library_root)
        self.db = LocalLibraryDB(library_root)
        self.auto_refresh_interval = auto_refresh_interval
        self.refresh_callbacks: List[Callable] = []
        self.refresh_thread: Optional[Thread] = None
        self._stop_refresh = False
        self._lock = Lock()
        self.worker_handlers: Dict[str, WorkerLoggingHandler] = {}  # Track active handlers
        self._worker_last_step: Dict[str, str] = {}
    def add_refresh_callback(self, callback: Callable[[List[Dict[str, Any]]], None]) -> None:
        """Register a callback to be called on worker updates.
        Args:
            callback: Function that receives list of active workers
        """
        with self._lock:
            self.refresh_callbacks.append(callback)
    def expire_running_workers(
        self,
        older_than_seconds: int = 300,
        worker_id_prefix: Optional[str] = None,
        reason: Optional[str] = None,
        status: str = "error",
    ) -> int:
        """Mark stale running workers as finished.
        Args:
            older_than_seconds: Idle threshold before expiring.
            worker_id_prefix: Optional wildcard filter (e.g., 'cli_%').
            reason: Error message if none already exists.
            status: New status to apply.
        Returns:
            Count of workers updated.
        """
        try:
            return self.db.expire_running_workers(
                older_than_seconds=older_than_seconds,
                status=status,
                reason=reason,
                worker_id_prefix=worker_id_prefix,
            )
        except Exception as exc:
            logger.error(f"Failed to expire stale workers: {exc}", exc_info=True)
            return 0
    def remove_refresh_callback(self, callback: Callable) -> None:
        """Remove a refresh callback.
        Args:
            callback: The callback function to remove
        """
        with self._lock:
            if callback in self.refresh_callbacks:
                self.refresh_callbacks.remove(callback)
    def enable_logging_for_worker(self, worker_id: str) -> Optional[WorkerLoggingHandler]:
        """Enable logging capture for a worker.
        Creates a logging handler that captures all logs for this worker.
        Args:
            worker_id: ID of the worker to capture logs for
        Returns:
            The logging handler that was created, or None if there was an error
        """
        try:
            handler = WorkerLoggingHandler(worker_id, self.db, manager=self)
            with self._lock:
                self.worker_handlers[worker_id] = handler
            # Add the handler to the root logger so it captures all logs
            root_logger = logging.getLogger()
            root_logger.addHandler(handler)
            root_logger.setLevel(logging.DEBUG)  # Capture all levels
            logger.debug(f"[WorkerManager] Enabled logging for worker: {worker_id}")
            return handler
        except Exception as e:
            logger.error(f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True)
            return None
    def disable_logging_for_worker(self, worker_id: str) -> None:
        """Disable logging capture for a worker and flush any pending logs.
        Args:
            worker_id: ID of the worker to stop capturing logs for
        """
        try:
            with self._lock:
                handler = self.worker_handlers.pop(worker_id, None)
            if handler:
                # Flush and close the handler
                handler.flush()
                handler.close()
                # Remove from root logger
                root_logger = logging.getLogger()
                root_logger.removeHandler(handler)
                logger.debug(f"[WorkerManager] Disabled logging for worker: {worker_id}")
        except Exception as e:
            logger.error(f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}", exc_info=True)
    def track_worker(self, worker_id: str, worker_type: str, title: str = "", 
                    description: str = "", total_steps: int = 0,
                    pipe: Optional[str] = None) -> bool:
        """Start tracking a new worker.
        Args:
            worker_id: Unique identifier for the worker
            worker_type: Type of worker (e.g., 'download', 'search', 'import')
            title: Worker title/name
            description: Worker description
            total_steps: Total number of steps for progress tracking
            pipe: Text of the originating pipe/prompt, if any
        Returns:
            True if worker was inserted successfully
        """
        try:
            result = self.db.insert_worker(worker_id, worker_type, title, description, total_steps, pipe=pipe)
            if result > 0:
                logger.debug(f"[WorkerManager] Tracking worker: {worker_id} ({worker_type})")
                self._start_refresh_if_needed()
                return True
            return False
        except Exception as e:
            logger.error(f"[WorkerManager] Error tracking worker: {e}", exc_info=True)
            return False
    def update_worker(self, worker_id: str, progress: float = 0.0, current_step: str = "",
                     details: str = "", error: str = "") -> bool:
        """Update worker progress and status.
        Args:
            worker_id: Unique identifier for the worker
            progress: Progress percentage (0-100)
            current_step: Current step description
            details: Additional details
            error: Error message if any
        Returns:
            True if update was successful
        """
        try:
            kwargs = {}
            if progress > 0:
                kwargs['progress'] = progress
            if current_step:
                kwargs['current_step'] = current_step
            if details:
                kwargs['description'] = details
            if error:
                kwargs['error_message'] = error
            if kwargs:
                kwargs['last_updated'] = datetime.now().isoformat()
                if 'current_step' in kwargs and kwargs['current_step']:
                    self._worker_last_step[worker_id] = str(kwargs['current_step'])
                return self.db.update_worker(worker_id, **kwargs)
            return True
        except Exception as e:
            logger.error(f"[WorkerManager] Error updating worker {worker_id}: {e}", exc_info=True)
            return False
    def finish_worker(self, worker_id: str, result: str = "completed", 
                     error_msg: str = "", result_data: str = "") -> bool:
        """Mark a worker as finished.
        Args:
            worker_id: Unique identifier for the worker
            result: Result status ('completed', 'error', 'cancelled')
            error_msg: Error message if any
            result_data: Result data as JSON string
        Returns:
            True if update was successful
        """
        try:
            kwargs = {
                'status': result,
                'completed_at': datetime.now().isoformat()
            }
            if error_msg:
                kwargs['error_message'] = error_msg
            if result_data:
                kwargs['result_data'] = result_data
            success = self.db.update_worker(worker_id, **kwargs)
            logger.info(f"[WorkerManager] Worker finished: {worker_id} ({result})")
            self._worker_last_step.pop(worker_id, None)
            return success
        except Exception as e:
            logger.error(f"[WorkerManager] Error finishing worker {worker_id}: {e}", exc_info=True)
            return False
    def get_active_workers(self) -> List[Dict[str, Any]]:
        """Get all active (running) workers.
        Returns:
            List of active worker dictionaries
        """
        try:
            return self.db.get_active_workers()
        except Exception as e:
            logger.error(f"[WorkerManager] Error getting active workers: {e}", exc_info=True)
            return []
    def get_finished_workers(self, limit: int = 100) -> List[Dict[str, Any]]:
        """Get all finished workers (completed, errored, or cancelled).
        Args:
            limit: Maximum number of workers to retrieve
        Returns:
            List of finished worker dictionaries
        """
        try:
            all_workers = self.db.get_all_workers(limit=limit)
            # Filter to only finished workers
            finished = [w for w in all_workers if w.get('status') in ['completed', 'error', 'cancelled']]
            return finished
        except Exception as e:
            logger.error(f"[WorkerManager] Error getting finished workers: {e}", exc_info=True)
            return []
    def get_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
        """Get a specific worker's data.
        Args:
            worker_id: Unique identifier for the worker
        Returns:
            Worker data or None if not found
        """
        try:
            return self.db.get_worker(worker_id)
        except Exception as e:
            logger.error(f"[WorkerManager] Error getting worker {worker_id}: {e}", exc_info=True)
            return None
    def get_worker_events(self, worker_id: str, limit: int = 500) -> List[Dict[str, Any]]:
        """Fetch recorded worker timeline events."""
        return self.db.get_worker_events(worker_id, limit)
    def log_step(self, worker_id: str, step_text: str) -> bool:
        """Log a step to a worker's step history.
        Args:
            worker_id: Unique identifier for the worker
            step_text: Step description to log
        Returns:
            True if successful
        """
        try:
            success = self.db.append_worker_steps(worker_id, step_text)
            if success:
                self._worker_last_step[worker_id] = step_text
            return success
        except Exception as e:
            logger.error(f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True)
            return False
    def _get_last_step(self, worker_id: str) -> Optional[str]:
        """Return the most recent step description for a worker."""
        return self._worker_last_step.get(worker_id)
    def get_steps(self, worker_id: str) -> str:
        """Get step logs for a worker.
        Args:
            worker_id: Unique identifier for the worker
        Returns:
            Steps text or empty string if not found
        """
        try:
            return self.db.get_worker_steps(worker_id)
        except Exception as e:
            logger.error(f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True)
            return ''
    def start_auto_refresh(self) -> None:
        """Start the auto-refresh thread for periodic worker updates."""
        if self.auto_refresh_interval <= 0:
            logger.debug("[WorkerManager] Auto-refresh disabled (interval <= 0)")
            return
        if self.refresh_thread and self.refresh_thread.is_alive():
            logger.debug("[WorkerManager] Auto-refresh already running")
            return
        logger.info(f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval")
        self._stop_refresh = False
        self.refresh_thread = Thread(target=self._auto_refresh_loop, daemon=True)
        self.refresh_thread.start()
    def stop_auto_refresh(self) -> None:
        """Stop the auto-refresh thread."""
        logger.info("[WorkerManager] Stopping auto-refresh")
        self._stop_refresh = True
        if self.refresh_thread:
            self.refresh_thread.join(timeout=5)
            self.refresh_thread = None
    def _start_refresh_if_needed(self) -> None:
        """Start auto-refresh if we have active workers and callbacks."""
        active = self.get_active_workers()
        if active and self.refresh_callbacks and not self._stop_refresh:
            self.start_auto_refresh()
    def _auto_refresh_loop(self) -> None:
        """Main auto-refresh loop that periodically queries and notifies."""
        try:
            while not self._stop_refresh:
                time.sleep(self.auto_refresh_interval)
                # Check if there are active workers
                active = self.get_active_workers()
                if not active:
                    # No more active workers, stop refreshing
                    logger.debug("[WorkerManager] No active workers, stopping auto-refresh")
                    break
                # Call all registered callbacks with the active workers
                with self._lock:
                    for callback in self.refresh_callbacks:
                        try:
                            callback(active)
                        except Exception as e:
                            logger.error(f"[WorkerManager] Error in refresh callback: {e}", exc_info=True)
        except Exception as e:
            logger.error(f"[WorkerManager] Error in auto-refresh loop: {e}", exc_info=True)
        finally:
            logger.debug("[WorkerManager] Auto-refresh loop ended")
    def cleanup_old_workers(self, days: int = 7) -> int:
        """Clean up completed/errored workers older than specified days.
        Args:
            days: Delete workers completed more than this many days ago
        Returns:
            Number of workers deleted
        """
        try:
            count = self.db.cleanup_old_workers(days)
            if count > 0:
                logger.info(f"[WorkerManager] Cleaned up {count} old workers")
            return count
        except Exception as e:
            logger.error(f"[WorkerManager] Error cleaning up old workers: {e}", exc_info=True)
            return 0
    def append_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
        """Append text to a worker's stdout log.
        Args:
            worker_id: Unique identifier for the worker
            text: Text to append
            channel: Logical channel (stdout, stderr, log, etc.)
        Returns:
            True if append was successful
        """
        try:
            step_label = self._get_last_step(worker_id)
            return self.db.append_worker_stdout(worker_id, text, step=step_label, channel=channel)
        except Exception as e:
            logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
            return False
    def get_stdout(self, worker_id: str) -> str:
        """Get stdout logs for a worker.
        Args:
            worker_id: Unique identifier for the worker
        Returns:
            Worker's stdout or empty string
        """
        try:
            return self.db.get_worker_stdout(worker_id)
        except Exception as e:
            logger.error(f"[WorkerManager] Error getting stdout: {e}", exc_info=True)
            return ""
    def append_worker_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
        """Compatibility wrapper for append_stdout."""
        return self.append_stdout(worker_id, text, channel=channel)
    def clear_stdout(self, worker_id: str) -> bool:
        """Clear stdout logs for a worker.
        Args:
            worker_id: Unique identifier for the worker
        Returns:
            True if clear was successful
        """
        try:
            return self.db.clear_worker_stdout(worker_id)
        except Exception as e:
            logger.error(f"[WorkerManager] Error clearing stdout: {e}", exc_info=True)
            return False
    def close(self) -> None:
        """Close the worker manager and database connection."""
        self.stop_auto_refresh()
        self.db.close()
        logger.info("[WorkerManager] Closed")
--- a/hydrus_health_check.py
+++ b/hydrus_health_check.py
@@ -0,0 +1,425 @@
 """Hydrus API health check and initialization.
 Provides startup health checks for Hydrus API availability and gracefully
 disables Hydrus features if the API is unavailable.
 """
 import logging
 import sys
 from helper.logger import log
 from typing import Tuple, Optional, Dict, Any
 from pathlib import Path
 logger = logging.getLogger(__name__)
 # Global state for Hydrus availability
 _HYDRUS_AVAILABLE: Optional[bool] = None
 _HYDRUS_UNAVAILABLE_REASON: Optional[str] = None
 _HYDRUS_CHECK_COMPLETE = False
 # Global state for Debrid availability
 _DEBRID_AVAILABLE: Optional[bool] = None
 _DEBRID_UNAVAILABLE_REASON: Optional[str] = None
 _DEBRID_CHECK_COMPLETE = False
 # Global state for MPV availability
 _MPV_AVAILABLE: Optional[bool] = None
 _MPV_UNAVAILABLE_REASON: Optional[str] = None
 _MPV_CHECK_COMPLETE = False
 def check_hydrus_availability(config: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
    """Check if Hydrus API is available by pinging it.
    Args:
        config: Application configuration dictionary
    Returns:
        Tuple of (is_available: bool, reason: Optional[str])
        - (True, None) if Hydrus is available
        - (False, reason) if Hydrus is unavailable with reason
    """
    try:
        from helper.hydrus import is_available as _is_hydrus_available
        logger.info("[Hydrus Health Check] Pinging Hydrus API...")
        is_available, reason = _is_hydrus_available(config, use_cache=False)
        if is_available:
            logger.info("[Hydrus Health Check] ✅ Hydrus API is AVAILABLE")
            return True, None
        else:
            reason_str = f": {reason}" if reason else ""
            logger.warning(f"[Hydrus Health Check] ❌ Hydrus API is UNAVAILABLE{reason_str}")
            return False, reason
    except Exception as e:
        error_msg = str(e)
        logger.error(f"[Hydrus Health Check] ❌ Error checking Hydrus availability: {error_msg}")
        return False, error_msg
 def initialize_hydrus_health_check(config: Dict[str, Any]) -> None:
    """Initialize Hydrus health check at startup.
    This should be called once at application startup to determine if Hydrus
    features should be enabled or disabled.
    Args:
        config: Application configuration dictionary
    """
    global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON, _HYDRUS_CHECK_COMPLETE
    logger.info("[Startup] Starting Hydrus health check...")
    try:
        is_available, reason = check_hydrus_availability(config)
        _HYDRUS_AVAILABLE = is_available
        _HYDRUS_UNAVAILABLE_REASON = reason
        _HYDRUS_CHECK_COMPLETE = True
        if is_available:
            log("✅ Hydrus: ENABLED - All Hydrus features available", file=sys.stderr)
        else:
            log(f"⚠️  Hydrus: DISABLED - {reason or 'Connection failed'}", file=sys.stderr)
            log("- Export functionality disabled", file=sys.stderr)
            log("- Hydrus library features disabled", file=sys.stderr)
            log("- Hydrus tag operations disabled", file=sys.stderr)
            log("→ Local storage and All-Debrid features still available", file=sys.stderr)
    except Exception as e:
        logger.error(f"[Startup] Failed to initialize Hydrus health check: {e}", exc_info=True)
        _HYDRUS_AVAILABLE = False
        _HYDRUS_UNAVAILABLE_REASON = str(e)
        _HYDRUS_CHECK_COMPLETE = True
        log(f"⚠️  Hydrus: DISABLED - Error during health check: {e}", file=sys.stderr)
 def check_debrid_availability(config: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
    """Check if Debrid API is available.
    Args:
        config: Application configuration dictionary
    Returns:
        Tuple of (is_available: bool, reason: Optional[str])
        - (True, None) if Debrid API is available
        - (False, reason) if Debrid API is unavailable with reason
    """
    try:
        from helper.http_client import HTTPClient
        logger.info("[Debrid Health Check] Pinging Debrid API at https://api.alldebrid.com/v4/ping...")
        try:
            # Use the public ping endpoint to check API availability
            # This endpoint doesn't require authentication
            with HTTPClient(timeout=10.0, verify_ssl=True) as client:
                response = client.get('https://api.alldebrid.com/v4/ping')
                logger.debug(f"[Debrid Health Check] Response status: {response.status_code}")
                # Read response text first (handles gzip decompression)
                try:
                    response_text = response.text
                    logger.debug(f"[Debrid Health Check] Response text: {response_text}")
                except Exception as e:
                    logger.error(f"[Debrid Health Check] ❌ Failed to read response text: {e}")
                    return False, f"Failed to read response: {e}"
                # Parse JSON
                try:
                    result = response.json()
                    logger.debug(f"[Debrid Health Check] Response JSON: {result}")
                except Exception as e:
                    logger.error(f"[Debrid Health Check] ❌ Failed to parse JSON: {e}")
                    logger.error(f"[Debrid Health Check] Response was: {response_text}")
                    return False, f"Failed to parse response: {e}"
                # Validate response format
                if result.get('status') == 'success' and result.get('data', {}).get('ping') == 'pong':
                    logger.info("[Debrid Health Check] ✅ Debrid API is AVAILABLE")
                    return True, None
                else:
                    logger.warning(f"[Debrid Health Check] ❌ Debrid API returned unexpected response: {result}")
                    return False, "Invalid API response"
        except Exception as e:
            error_msg = str(e)
            logger.warning(f"[Debrid Health Check] ❌ Debrid API error: {error_msg}")
            import traceback
            logger.debug(f"[Debrid Health Check] Traceback: {traceback.format_exc()}")
            return False, error_msg
    except Exception as e:
        error_msg = str(e)
        logger.error(f"[Debrid Health Check] ❌ Error checking Debrid availability: {error_msg}")
        return False, error_msg
 def initialize_debrid_health_check(config: Dict[str, Any]) -> None:
    """Initialize Debrid health check at startup.
    This should be called once at application startup to determine if Debrid
    features should be enabled or disabled.
    Args:
        config: Application configuration dictionary
    """
    global _DEBRID_AVAILABLE, _DEBRID_UNAVAILABLE_REASON, _DEBRID_CHECK_COMPLETE
    logger.info("[Startup] Starting Debrid health check...")
    try:
        is_available, reason = check_debrid_availability(config)
        _DEBRID_AVAILABLE = is_available
        _DEBRID_UNAVAILABLE_REASON = reason
        _DEBRID_CHECK_COMPLETE = True
        if is_available:
            log("✅ Debrid: ENABLED - All Debrid features available", file=sys.stderr)
            logger.info("[Startup] Debrid health check PASSED")
        else:
            log(f"⚠️  Debrid: DISABLED - {reason or 'Connection failed'}", file=sys.stderr)
            log("- Debrid export disabled", file=sys.stderr)
            log("- Debrid library features disabled", file=sys.stderr)
            log("→ Local storage and Hydrus features still available", file=sys.stderr)
            logger.warning(f"[Startup] Debrid health check FAILED: {reason}")
    except Exception as e:
        logger.error(f"[Startup] Failed to initialize Debrid health check: {e}", exc_info=True)
        _DEBRID_AVAILABLE = False
        _DEBRID_UNAVAILABLE_REASON = str(e)
        _DEBRID_CHECK_COMPLETE = True
        log(f"⚠️  Debrid: DISABLED - Error during health check: {e}", file=sys.stderr)
 def check_mpv_availability() -> Tuple[bool, Optional[str]]:
    """Check if MPV is available (installed and runnable).
    Returns:
        Tuple of (is_available: bool, reason: Optional[str])
    """
    global _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON, _MPV_CHECK_COMPLETE
    if _MPV_CHECK_COMPLETE and _MPV_AVAILABLE is not None:
        return _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON
    import shutil
    import subprocess
    logger.info("[MPV Health Check] Checking for MPV executable...")
    mpv_path = shutil.which("mpv")
    if not mpv_path:
        _MPV_AVAILABLE = False
        _MPV_UNAVAILABLE_REASON = "Executable 'mpv' not found in PATH"
        _MPV_CHECK_COMPLETE = True
        logger.warning(f"[MPV Health Check] ❌ MPV is UNAVAILABLE: {_MPV_UNAVAILABLE_REASON}")
        return False, _MPV_UNAVAILABLE_REASON
    # Try to get version to confirm it works
    try:
        result = subprocess.run(
            [mpv_path, "--version"], 
            capture_output=True, 
            text=True, 
            timeout=2
        )
        if result.returncode == 0:
            version_line = result.stdout.split('\n')[0]
            _MPV_AVAILABLE = True
            _MPV_UNAVAILABLE_REASON = None
            _MPV_CHECK_COMPLETE = True
            logger.info(f"[MPV Health Check] ✅ MPV is AVAILABLE ({version_line})")
            return True, None
        else:
            _MPV_AVAILABLE = False
            _MPV_UNAVAILABLE_REASON = f"MPV returned non-zero exit code: {result.returncode}"
            _MPV_CHECK_COMPLETE = True
            logger.warning(f"[MPV Health Check] ❌ MPV is UNAVAILABLE: {_MPV_UNAVAILABLE_REASON}")
            return False, _MPV_UNAVAILABLE_REASON
    except Exception as e:
        _MPV_AVAILABLE = False
        _MPV_UNAVAILABLE_REASON = f"Error running MPV: {e}"
        _MPV_CHECK_COMPLETE = True
        logger.warning(f"[MPV Health Check] ❌ MPV is UNAVAILABLE: {_MPV_UNAVAILABLE_REASON}")
        return False, _MPV_UNAVAILABLE_REASON
 def initialize_mpv_health_check() -> None:
    """Initialize MPV health check at startup.
    This should be called once at application startup to determine if MPV
    features should be enabled or disabled.
    """
    global _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON, _MPV_CHECK_COMPLETE
    logger.info("[Startup] Starting MPV health check...")
    try:
        is_available, reason = check_mpv_availability()
        _MPV_AVAILABLE = is_available
        _MPV_UNAVAILABLE_REASON = reason
        _MPV_CHECK_COMPLETE = True
        if is_available:
            log("✅ MPV: ENABLED - All MPV features available", file=sys.stderr)
            logger.info("[Startup] MPV health check PASSED")
        else:
            log(f"⚠️  MPV: DISABLED - {reason or 'Connection failed'}", file=sys.stderr)
            log("→ Hydrus features still available", file=sys.stderr)
            logger.warning(f"[Startup] MPV health check FAILED: {reason}")
    except Exception as e:
        logger.error(f"[Startup] Failed to initialize MPV health check: {e}", exc_info=True)
        _MPV_AVAILABLE = False
        _MPV_UNAVAILABLE_REASON = str(e)
        _MPV_CHECK_COMPLETE = True
        log(f"⚠️  MPV: DISABLED - Error during health check: {e}", file=sys.stderr)
 def is_hydrus_available() -> bool:
    """Check if Hydrus is available (from cached health check).
    Returns:
        True if Hydrus API is available, False otherwise
    """
    return _HYDRUS_AVAILABLE is True
 def get_hydrus_unavailable_reason() -> Optional[str]:
    """Get the reason why Hydrus is unavailable.
    Returns:
        String explaining why Hydrus is unavailable, or None if available
    """
    return _HYDRUS_UNAVAILABLE_REASON if not is_hydrus_available() else None
 def is_hydrus_check_complete() -> bool:
    """Check if the Hydrus health check has been completed.
    Returns:
        True if health check has run, False if still pending
    """
    return _HYDRUS_CHECK_COMPLETE
 def disable_hydrus_features() -> None:
    """Manually disable all Hydrus features (for testing/fallback).
    This can be called if Hydrus connectivity is lost after startup.
    """
    global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON
    _HYDRUS_AVAILABLE = False
    _HYDRUS_UNAVAILABLE_REASON = "Manually disabled or lost connection"
    logger.warning("[Hydrus] Features manually disabled")
 def enable_hydrus_features() -> None:
    """Manually enable Hydrus features (for testing/fallback).
    This can be called if Hydrus connectivity is restored after startup.
    """
    global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON
    _HYDRUS_AVAILABLE = True
    _HYDRUS_UNAVAILABLE_REASON = None
    logger.info("[Hydrus] Features manually enabled")
 def is_debrid_available() -> bool:
    """Check if Debrid is available (from cached health check).
    Returns:
        True if Debrid API is available, False otherwise
    """
    return _DEBRID_AVAILABLE is True
 def get_debrid_unavailable_reason() -> Optional[str]:
    """Get the reason why Debrid is unavailable.
    Returns:
        String explaining why Debrid is unavailable, or None if available
    """
    return _DEBRID_UNAVAILABLE_REASON if not is_debrid_available() else None
 def is_debrid_check_complete() -> bool:
    """Check if the Debrid health check has been completed.
    Returns:
        True if health check has run, False if still pending
    """
    return _DEBRID_CHECK_COMPLETE
 def disable_debrid_features() -> None:
    """Manually disable all Debrid features (for testing/fallback).
    This can be called if Debrid connectivity is lost after startup.
    """
    global _DEBRID_AVAILABLE, _DEBRID_UNAVAILABLE_REASON
    _DEBRID_AVAILABLE = False
    _DEBRID_UNAVAILABLE_REASON = "Manually disabled or lost connection"
    logger.warning("[Debrid] Features manually disabled")
 def enable_debrid_features() -> None:
    """Manually enable Debrid features (for testing/fallback).
    This can be called if Debrid connectivity is restored after startup.
    """
    global _DEBRID_AVAILABLE, _DEBRID_UNAVAILABLE_REASON
    _DEBRID_AVAILABLE = True
    _DEBRID_UNAVAILABLE_REASON = None
    logger.info("[Debrid] Features manually enabled")
 def is_mpv_available() -> bool:
    """Check if MPV is available (from cached health check).
    Returns:
        True if MPV is available, False otherwise
    """
    return _MPV_AVAILABLE is True
 def get_mpv_unavailable_reason() -> Optional[str]:
    """Get the reason why MPV is unavailable.
    Returns:
        String explaining why MPV is unavailable, or None if available
    """
    return _MPV_UNAVAILABLE_REASON if not is_mpv_available() else None
 def is_mpv_check_complete() -> bool:
    """Check if the MPV health check has been completed.
    Returns:
        True if health check has run, False if still pending
    """
    return _MPV_CHECK_COMPLETE
 def disable_mpv_features() -> None:
    """Manually disable all MPV features (for testing/fallback).
    This can be called if MPV connectivity is lost after startup.
    """
    global _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON
    _MPV_AVAILABLE = False
    _MPV_UNAVAILABLE_REASON = "Manually disabled or lost connection"
    logger.warning("[MPV] Features manually disabled")
 def enable_mpv_features() -> None:
    """Manually enable MPV features (for testing/fallback).
    This can be called if MPV connectivity is restored after startup.
    """
    global _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON
    _MPV_AVAILABLE = True
    _MPV_UNAVAILABLE_REASON = None
    logger.info("[MPV] Features manually enabled")
--- a/medeia_entry.py
+++ b/medeia_entry.py
@@ -0,0 +1,13 @@
 """Entry point wrapper for Medeia-Macina CLI."""
 import sys
 from pathlib import Path
 # Add the current directory to sys.path so we can import CLI
 root_dir = Path(__file__).parent
 if str(root_dir) not in sys.path:
    sys.path.insert(0, str(root_dir))
 from CLI import main
 if __name__ == "__main__":
    main()
--- a/medeia_macina/init.py
+++ b/medeia_macina/init.py
@@ -0,0 +1,2 @@
 """Medeia-Macina package - Media management system."""
 __version__ = "0.1.0"
--- a/medeia_macina/cli_entry.py
+++ b/medeia_macina/cli_entry.py
@@ -0,0 +1,13 @@
 """Entry point wrapper for Medeia-Macina CLI."""
 import sys
 from pathlib import Path
 # Add the parent directory to sys.path so we can import CLI
 root_dir = Path(__file__).parent.parent
 if str(root_dir) not in sys.path:
    sys.path.insert(0, str(root_dir))
 from CLI import main
 if __name__ == "__main__":
    main()
--- a/metadata.py
+++ b/metadata.py
--- a/models.py
+++ b/models.py
@@ -0,0 +1,678 @@
 """Data models for the pipeline."""
 import datetime
 import hashlib
 import json
 import math
 import os
 import shutil
 import sys
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Protocol, TextIO, Tuple
@dataclass(slots=True)
 class PipeObject:
    """Unified pipeline object for tracking files, metadata, tags, and relationships through the pipeline.
    This is the single source of truth for all result data in the pipeline. It can represent:
    - Tag extraction results (IMDb, MusicBrainz, OpenLibrary lookups)
    - Remote metadata fetches
    - File operations with metadata/tags and relationship tracking
    - Search results
    - Files with version relationships (king/alt/related)
    Attributes:
        source: Source of the object (e.g., 'imdb', 'musicbrainz', 'libgen', 'debrid', 'file', etc.)
        identifier: Unique identifier from the source (e.g., IMDb ID, MBID, magnet hash, file hash)
        tags: List of extracted or assigned tags
        title: Human-readable title if applicable
        source_url: URL where the object came from
        duration: Duration in seconds if applicable
        metadata: Full metadata dictionary from source
        remote_metadata: Additional remote metadata
        warnings: Any warnings or issues encountered
        mpv_metadata: MPV-specific metadata if applicable
        file_path: Path to the file if this object represents a file
        file_hash: SHA-256 hash of the file for integrity and relationship tracking
        king_hash: Hash of the primary/master version of this file (for alternates)
        alt_hashes: List of hashes for alternate versions of this file
        related_hashes: List of hashes for related files (e.g., screenshots, editions)
        is_temp: If True, this is a temporary/intermediate artifact that may be cleaned up
        action: The cmdlet that created this object (format: 'cmdlet:cmdlet_name', e.g., 'cmdlet:get-file')
        parent_id: Hash of the parent file in the pipeline chain (for tracking provenance/lineage)
        extra: Additional fields not covered above
    """
    source: str
    identifier: str
    tags: List[str] = field(default_factory=list)
    title: Optional[str] = None
    source_url: Optional[str] = None
    duration: Optional[float] = None
    metadata: Dict[str, Any] = field(default_factory=dict)
    remote_metadata: Optional[Dict[str, Any]] = None
    warnings: List[str] = field(default_factory=list)
    mpv_metadata: Optional[Dict[str, Any]] = None
    file_path: Optional[str] = None
    file_hash: Optional[str] = None
    king_hash: Optional[str] = None
    alt_hashes: List[str] = field(default_factory=list)
    related_hashes: List[str] = field(default_factory=list)
    is_temp: bool = False
    action: Optional[str] = None
    parent_id: Optional[str] = None
    extra: Dict[str, Any] = field(default_factory=dict)
    def register_as_king(self, file_hash: str) -> None:
        """Register this object as the king (primary) version of a file."""
        self.king_hash = file_hash
    def add_alternate(self, alt_hash: str) -> None:
        """Add an alternate version hash for this file."""
        if alt_hash not in self.alt_hashes:
            self.alt_hashes.append(alt_hash)
    def add_related(self, related_hash: str) -> None:
        """Add a related file hash (e.g., screenshot, edition)."""
        if related_hash not in self.related_hashes:
            self.related_hashes.append(related_hash)
    def get_relationships(self) -> Dict[str, Any]:
        """Get all relationships for this object."""
        rels = {}
        if self.king_hash:
            rels["king"] = self.king_hash
        if self.alt_hashes:
            rels["alt"] = self.alt_hashes
        if self.related_hashes:
            rels["related"] = self.related_hashes
        return rels
    def to_dict(self) -> Dict[str, Any]:
        """Serialize to dictionary, excluding None and empty values."""
        data: Dict[str, Any] = {
            "source": self.source,
            "tags": self.tags,
        }
        if self.identifier:
            data["id"] = self.identifier
        if self.title:
            data["title"] = self.title
        if self.source_url:
            data["source_url"] = self.source_url
        if self.duration is not None:
            data["duration"] = self.duration
        if self.metadata:
            data["metadata"] = self.metadata
        if self.remote_metadata is not None:
            data["remote_metadata"] = self.remote_metadata
        if self.mpv_metadata is not None:
            data["mpv_metadata"] = self.mpv_metadata
        if self.warnings:
            data["warnings"] = self.warnings
        if self.file_path:
            data["file_path"] = self.file_path
        if self.file_hash:
            data["file_hash"] = self.file_hash
        # Include pipeline chain tracking fields
        if self.is_temp:
            data["is_temp"] = self.is_temp
        if self.action:
            data["action"] = self.action
        if self.parent_id:
            data["parent_id"] = self.parent_id
        # Include relationship data if present
        rels = self.get_relationships()
        if rels:
            data["relationships"] = rels
        data.update({k: v for k, v in self.extra.items() if v is not None})
        return data
    @property
    def hash(self) -> str:
        """Compute SHA-256 hash from source and identifier."""
        base = f"{self.source}:{self.identifier}"
        return hashlib.sha256(base.encode('utf-8')).hexdigest()
    # Backwards compatibility aliases
    def as_dict(self) -> Dict[str, Any]:
        """Alias for to_dict() for backwards compatibility."""
        return self.to_dict()
    def to_serializable(self) -> Dict[str, Any]:
        """Alias for to_dict() for backwards compatibility."""
        return self.to_dict()
 class FileRelationshipTracker:
    """Track relationships between files for sidecar creation.
    Allows tagging files with their relationships to other files:
    - king: The primary/master version of a file
    - alt: Alternate versions of the same content
    - related: Related files (e.g., screenshots of a book)
    """
    def __init__(self) -> None:
        self.relationships: Dict[str, Dict[str, Any]] = {}
    def register_king(self, file_path: str, file_hash: str) -> None:
        """Register a file as the king (primary) version."""
        if file_path not in self.relationships:
            self.relationships[file_path] = {}
        self.relationships[file_path]["king"] = file_hash
    def add_alt(self, file_path: str, alt_hash: str) -> None:
        """Add an alternate version of a file."""
        if file_path not in self.relationships:
            self.relationships[file_path] = {}
        if "alt" not in self.relationships[file_path]:
            self.relationships[file_path]["alt"] = []
        if alt_hash not in self.relationships[file_path]["alt"]:
            self.relationships[file_path]["alt"].append(alt_hash)
    def add_related(self, file_path: str, related_hash: str) -> None:
        """Add a related file."""
        if file_path not in self.relationships:
            self.relationships[file_path] = {}
        if "related" not in self.relationships[file_path]:
            self.relationships[file_path]["related"] = []
        if related_hash not in self.relationships[file_path]["related"]:
            self.relationships[file_path]["related"].append(related_hash)
    def get_relationships(self, file_path: str) -> Optional[Dict[str, Any]]:
        """Get relationships for a file."""
        return self.relationships.get(file_path)
    def link_files(self, primary_path: str, king_hash: str, *alt_paths: str) -> None:
        """Link files together with primary as king and others as alternates.
        Args:
            primary_path: Path to the primary file (will be marked as 'king')
            king_hash: Hash of the primary file
            alt_paths: Paths to alternate versions (will be marked as 'alt')
        """
        self.register_king(primary_path, king_hash)
        for alt_path in alt_paths:
            try:
                alt_hash = _get_file_hash(alt_path)
                self.add_alt(primary_path, alt_hash)
            except Exception as e:
                import sys
                print(f"Error hashing {alt_path}: {e}", file=sys.stderr)
 def _get_file_hash(filepath: str) -> str:
    """Calculate SHA256 hash of a file."""
    sha256_hash = hashlib.sha256()
    with open(filepath, "rb") as f:
        for byte_block in iter(lambda: f.read(4096), b""):
            sha256_hash.update(byte_block)
    return sha256_hash.hexdigest()
 # ============= Download Module Classes =============
 class DownloadError(RuntimeError):
    """Raised when the download or Hydrus import fails."""
@dataclass(slots=True)
 class DownloadOptions:
    """Configuration for downloading media.
    Use the add-file cmdlet separately for Hydrus import.
    """
    url: str
    mode: str  # "audio" or "video"
    output_dir: Path
    cookies_path: Optional[Path] = None
    ytdl_format: Optional[str] = None
    extra_tags: Optional[List[str]] = None
    debug_log: Optional[Path] = None
    native_progress: bool = False
    clip_sections: Optional[str] = None
    playlist_items: Optional[str] = None  # yt-dlp --playlist-items format (e.g., "1-3,5,8")
    no_playlist: bool = False  # If True, pass --no-playlist to yt-dlp
 class SendFunc(Protocol):
    """Protocol for event sender function."""
    def __call__(self, event: str, **payload: Any) -> None:
        ...
@dataclass(slots=True)
 class DownloadMediaResult:
    """Result of a successful media download."""
    path: Path
    info: Dict[str, Any]
    tags: List[str]
    source_url: Optional[str]
    hash_value: Optional[str] = None
@dataclass(slots=True)
 class DebugLogger:
    """Logs events to a JSON debug file for troubleshooting downloads."""
    path: Path
    file: Optional[TextIO] = None
    session_started: bool = False
    def ensure_open(self) -> None:
        """Open the debug log file if not already open."""
        if self.file is not None:
            return
        try:
            parent = self.path.parent
            if parent and not parent.exists():
                parent.mkdir(parents=True, exist_ok=True)
            self.file = self.path.open("a", encoding="utf-8")
        except OSError as exc:  # pragma: no cover - surfaces to stderr
            print(f"Failed to open debug log {self.path}: {exc}", file=sys.stderr)
            self.file = None
            return
        self._write_session_header()
    def _write_session_header(self) -> None:
        """Write session start marker to log."""
        if self.session_started:
            return
        self.session_started = True
        self.write_record("session-start", {"pid": os.getpid(), "exe": sys.executable})
    def write_raw(self, text: str) -> None:
        """Write raw text to debug log."""
        self.ensure_open()
        if self.file is None:
            return
        self.file.write(text + "\n")
        self.file.flush()
    def write_record(self, event: str, payload: Optional[Dict[str, Any]] = None) -> None:
        """Write a structured event record to debug log."""
        record = {
            "timestamp": datetime.datetime.utcnow().isoformat(timespec="seconds") + "Z",
            "event": event,
            "payload": payload,
        }
        self.write_raw(json.dumps(_sanitise_for_json(record), ensure_ascii=False))
    def close(self) -> None:
        """Close the debug log file."""
        if self.file is None:
            return
        try:
            self.file.close()
        finally:
            self.file = None
 def _sanitise_for_json(value: Any, *, max_depth: int = 8, _seen: Optional[set[int]] = None) -> Any:
    """Best-effort conversion to JSON-serialisable types without raising on cycles."""
    import math
    from dataclasses import asdict, is_dataclass
    if value is None or isinstance(value, (str, bool)):
        return value
    if isinstance(value, (int, float)):
        if isinstance(value, float) and not math.isfinite(value):
            return repr(value)
        return value
    if isinstance(value, Path):
        return str(value)
    if isinstance(value, bytes):
        try:
            return value.decode()
        except Exception:
            return value.hex()
    if max_depth <= 0:
        return repr(value)
    if _seen is None:
        _seen = set()
    obj_id = id(value)
    if obj_id in _seen:
        return "<circular>"
    _seen.add(obj_id)
    try:
        if isinstance(value, dict):
            return {
                str(key): _sanitise_for_json(val, max_depth=max_depth - 1, _seen=_seen)
                for key, val in value.items()
            }
        if isinstance(value, (list, tuple, set)):
            iterable = value if not isinstance(value, set) else list(value)
            return [
                _sanitise_for_json(item, max_depth=max_depth - 1, _seen=_seen)
                for item in iterable
            ]
        if is_dataclass(value) and not isinstance(value, type):
            return _sanitise_for_json(asdict(value), max_depth=max_depth - 1, _seen=_seen)
    finally:
        _seen.discard(obj_id)
    return repr(value)
 # ============================================================================
 # PROGRESS BAR CLASS
 # ============================================================================
 class ProgressBar:
    """Formats download progress with visual bar, speed, ETA, and file size."""
    def __init__(self, width: Optional[int] = None):
        """Initialize progress bar with optional custom width.
        Args:
            width: Terminal width, defaults to auto-detect.
        """
        if width is None:
            width = shutil.get_terminal_size((80, 20))[0]
        self.width = max(40, width)  # Minimum 40 chars for readability
    def format_bytes(self, bytes_val: Optional[float]) -> str:
        """Format bytes to human-readable size.
        Args:
            bytes_val: Number of bytes or None.
        Returns:
            Formatted string (e.g., "123.4 MB", "1.2 GB").
        """
        if bytes_val is None or bytes_val <= 0:
            return "?.? B"
        for unit in ("B", "KB", "MB", "GB", "TB"):
            if bytes_val < 1024:
                return f"{bytes_val:.1f} {unit}"
            bytes_val /= 1024
        return f"{bytes_val:.1f} PB"
    def format_speed(self, speed_str: Optional[str]) -> str:
        """Format download speed.
        Args:
            speed_str: Speed string from yt-dlp (e.g., "1.23MiB/s").
        Returns:
            Formatted speed string or "?.? KB/s".
        """
        if not speed_str or speed_str.strip() == "":
            return "?.? KB/s"
        return speed_str.strip()
    def format_eta(self, eta_str: Optional[str]) -> str:
        """Format estimated time remaining.
        Args:
            eta_str: ETA string from yt-dlp (e.g., "00:12:34").
        Returns:
            Formatted ETA string or "?:?:?".
        """
        if not eta_str or eta_str.strip() == "":
            return "?:?:?"
        return eta_str.strip()
    def format_percent(self, percent_str: Optional[str]) -> float:
        """Extract percent as float.
        Args:
            percent_str: Percent string from yt-dlp (e.g., "45.2%").
        Returns:
            Float 0-100 or 0 if invalid.
        """
        if not percent_str:
            return 0.0
        try:
            return float(percent_str.replace("%", "").strip())
        except ValueError:
            return 0.0
    def build_bar(self, percent: float, width: int = 30) -> str:
        """Build ASCII progress bar.
        Args:
            percent: Completion percentage (0-100).
            width: Bar width in characters.
        Returns:
            Progress bar string (e.g., "[████████░░░░░░░░░░░░░░░░░░]").
        """
        percent = max(0, min(100, percent))  # Clamp to 0-100
        filled = int(percent * width / 100)
        empty = width - filled
        # Use box-drawing characters for nice appearance
        bar = "█" * filled + "░" * empty
        return f"[{bar}]"
    def format_progress(
        self,
        percent_str: Optional[str] = None,
        downloaded: Optional[int] = None,
        total: Optional[int] = None,
        speed_str: Optional[str] = None,
        eta_str: Optional[str] = None,
    ) -> str:
        """Format complete progress line.
        Args:
            percent_str: Percent string (e.g., "45.2%").
            downloaded: Downloaded bytes.
            total: Total bytes.
            speed_str: Speed string (e.g., "1.23MiB/s").
            eta_str: ETA string (e.g., "00:12:34").
        Returns:
            Formatted progress string.
        """
        percent = self.format_percent(percent_str)
        bar = self.build_bar(percent)
        # Format sizes
        if downloaded is not None and total is not None and total > 0:
            size_str = f"{self.format_bytes(downloaded)} / {self.format_bytes(total)}"
        elif total is not None and total > 0:
            size_str = f"/ {self.format_bytes(total)}"
        elif downloaded is not None and downloaded > 0:
            size_str = f"{self.format_bytes(downloaded)} downloaded"
        else:
            size_str = ""
        speed = self.format_speed(speed_str)
        eta = self.format_eta(eta_str)
        # Build complete line
        # Format: [████░░░░] 45.2% | 125.5 MB / 278.3 MB | 1.23 MB/s | ETA 00:12:34
        parts = [
            bar,
            f"{percent:5.1f}%",
        ]
        if size_str:
            parts.append(f"| {size_str}")
        parts.append(f"| {speed}")
        parts.append(f"| ETA {eta}")
        return "  ".join(parts)
    def format_summary(
        self,
        total: Optional[int] = None,
        speed_str: Optional[str] = None,
        elapsed_str: Optional[str] = None,
    ) -> str:
        """Format completion summary.
        Args:
            total: Total bytes downloaded.
            speed_str: Average speed.
            elapsed_str: Total time elapsed.
        Returns:
            Summary string.
        """
        parts = ["✓ Download complete"]
        if total is not None and total > 0:
            parts.append(f"| {self.format_bytes(total)}")
        if speed_str:
            parts.append(f"| {speed_str.strip()}")
        if elapsed_str:
            parts.append(f"| {elapsed_str.strip()}")
        return "  ".join(parts)
 # ============================================================================
 # PIPELINE EXECUTION CONTEXT
 # Consolidated from pipeline_context.py
 # ============================================================================
 # Note: Pipeline functions and state variables moved to pipeline.py
 class PipelineStageContext:
 	"""Context information for the current pipeline stage."""
 	def __init__(self, stage_index: int, total_stages: int):
 		self.stage_index = stage_index
 		self.total_stages = total_stages
 		self.is_last_stage = (stage_index == total_stages - 1)
 		self.emits: List[Any] = []
 	def emit(self, obj: Any) -> None:
 		"""Emit an object to the next pipeline stage."""
 		self.emits.append(obj)
 	def __repr__(self) -> str:
 		return f"PipelineStageContext(stage={self.stage_index}/{self.total_stages}, is_last={self.is_last_stage})"
 # ============================================================================
 # RESULT TABLE CLASSES
 # Consolidated from result_table.py
 # ============================================================================
@dataclass
 class InputOption:
    """Represents an interactive input option (cmdlet argument) in a table.
    Allows users to select options that translate to cmdlet arguments,
    enabling interactive configuration right from the result table.
    Example:
        # Create an option for location selection
        location_opt = InputOption(
            "location",
            type="enum",
            choices=["local", "hydrus", "0x0"],
            description="Download destination"
        )
        # Use in result table
        table.add_input_option(location_opt)
        selected = table.select_option("location")  # Returns user choice
    """
    name: str
    """Option name (maps to cmdlet argument)"""
    type: str = "string"
    """Option type: 'string', 'enum', 'flag', 'integer'"""
    choices: List[str] = field(default_factory=list)
    """Valid choices for enum type"""
    default: Optional[str] = None
    """Default value if not specified"""
    description: str = ""
    """Description of what this option does"""
    validator: Optional[Callable[[str], bool]] = None
    """Optional validator function: takes value, returns True if valid"""
    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary."""
        return {
            "name": self.name,
            "type": self.type,
            "choices": self.choices if self.choices else None,
            "default": self.default,
            "description": self.description,
        }
@dataclass
 class TUIResultCard:
    """Represents a result as a UI card with title, metadata, and actions.
    Used in hub-ui and TUI contexts to render individual search results 
    as grouped components with visual structure.
    """
    title: str
    subtitle: Optional[str] = None
    metadata: Optional[Dict[str, str]] = None
    media_kind: Optional[str] = None
    tags: Optional[List[str]] = None
    file_hash: Optional[str] = None
    file_size: Optional[str] = None
    duration: Optional[str] = None
    def __post_init__(self):
        """Initialize default values."""
        if self.metadata is None:
            self.metadata = {}
        if self.tags is None:
            self.tags = []
@dataclass
 class ResultColumn:
    """Represents a single column in a result table."""
    name: str
    value: str
    width: Optional[int] = None
    def __str__(self) -> str:
        """String representation of the column."""
        return f"{self.name}: {self.value}"
    def to_dict(self) -> Dict[str, str]:
        """Convert to dictionary."""
        return {"name": self.name, "value": self.value}
@dataclass
 class ResultRow:
    """Represents a single row in a result table."""
    columns: List[ResultColumn] = field(default_factory=list)
    def add_column(self, name: str, value: Any) -> None:
        """Add a column to this row."""
        str_value = str(value) if value is not None else ""
        self.columns.append(ResultColumn(name, str_value))
    def get_column(self, name: str) -> Optional[str]:
        """Get column value by name."""
        for col in self.columns:
            if col.name.lower() == name.lower():
                return col.value
        return None
    def to_dict(self) -> List[Dict[str, str]]:
        """Convert to list of column dicts."""
        return [col.to_dict() for col in self.columns]
    def to_list(self) -> List[tuple[str, str]]:
        """Convert to list of (name, value) tuples."""
        return [(col.name, col.value) for col in self.columns]
    def __str__(self) -> str:
        """String representation of the row."""
        return " | ".join(str(col) for col in self.columns)
--- a/pipeline.py
+++ b/pipeline.py
@@ -0,0 +1,679 @@
 """Pipeline execution context and state management for cmdlets.
 This module provides functions for managing pipeline state, allowing cmdlets to
 emit results and control printing behavior within a piped execution context.
 Key Concepts:
 - Pipeline stages are chained command invocations
 - Each stage receives input items and emits output items
 - Printing behavior is controlled based on pipeline position
 - Stage context tracks whether this is the last stage (affects output verbosity)
 PowerShell-like piping model:
 - Each stage processes items individually
 - Stage calls emit() for each output item
 - Output items become input for next stage
 - Batch commands receive all items at once (special case)
 """
 from __future__ import annotations
 import sys
 from typing import Any, Dict, List, Optional, Sequence
 from models import PipelineStageContext
 from helper.logger import log
 # ============================================================================
 # PIPELINE GLOBALS (maintained for backward compatibility)
 # ============================================================================
 # Current pipeline context (thread-local in real world, global here for simplicity)
 _CURRENT_CONTEXT: Optional[PipelineStageContext] = None
 # Active execution state
 _PIPE_EMITS: List[Any] = []
 _PIPE_ACTIVE: bool = False
 _PIPE_IS_LAST: bool = False
 # Ephemeral handoff for direct pipelines (e.g., URL --screen-shot | ...)
 _LAST_PIPELINE_CAPTURE: Optional[Any] = None
 # Remember last search query to support refreshing results after pipeline actions
 _LAST_SEARCH_QUERY: Optional[str] = None
 # Track whether the last pipeline execution already refreshed and displayed results
 _PIPELINE_REFRESHED: bool = False
 # Cache the last pipeline outputs so non-interactive callers can inspect results
 _PIPELINE_LAST_ITEMS: List[Any] = []
 # Store the last result table for @ selection syntax (e.g., @2, @2-5, @{1,3,5})
 _LAST_RESULT_TABLE: Optional[Any] = None
 _LAST_RESULT_ITEMS: List[Any] = []
 # History of result tables for @.. navigation (LIFO stack, max 20 tables)
 _RESULT_TABLE_HISTORY: List[tuple[Optional[Any], List[Any]]] = []
 _MAX_RESULT_TABLE_HISTORY = 20
 # Current stage table for @N expansion (separate from history)
 # Used to track the ResultTable with source_command + row_selection_args from current pipeline stage
 # This is set by cmdlets that display tabular results (e.g., download-data showing formats)
 # and used by CLI to expand @N into full commands like "download-data URL -item 2"
 _CURRENT_STAGE_TABLE: Optional[Any] = None
 # Items displayed by non-selectable commands (get-tag, delete-tag, etc.)
 # These are available for @N selection but NOT saved to history
 _DISPLAY_ITEMS: List[Any] = []
 # Table for display-only commands (overlay)
 # Used when a command wants to show a specific table formatting but not affect history
 _DISPLAY_TABLE: Optional[Any] = None
 # Track the indices the user selected via @ syntax for the current invocation
 _PIPELINE_LAST_SELECTION: List[int] = []
 # Track the currently executing command/pipeline string for worker attribution
 _PIPELINE_COMMAND_TEXT: str = ""
 # Shared scratchpad for cmdlets/funacts to stash structured data between stages
 _PIPELINE_VALUES: Dict[str, Any] = {}
 _PIPELINE_MISSING = object()
 # Global callback to notify UI when library content changes
 _UI_LIBRARY_REFRESH_CALLBACK: Optional[Any] = None
 # ============================================================================
 # PUBLIC API
 # ============================================================================
 def set_stage_context(context: Optional[PipelineStageContext]) -> None:
 	"""Internal: Set the current pipeline stage context."""
 	global _CURRENT_CONTEXT
 	_CURRENT_CONTEXT = context
 def get_stage_context() -> Optional[PipelineStageContext]:
 	"""Get the current pipeline stage context."""
 	return _CURRENT_CONTEXT
 def emit(obj: Any) -> None:
 	"""Emit an object to the current pipeline stage output.
 	Call this from a cmdlet to pass data to the next pipeline stage.
 	If not in a pipeline context, this is a no-op.
 	Args:
 		obj: Any object to emit downstream
 	Example:
 		```python
 		def _run(item, args, config):
 			result = process(item)
 			if result:
 				emit(result)  # Pass to next stage
 			return 0
 		```
 	"""
 	# Try new context-based approach first
 	if _CURRENT_CONTEXT is not None:
 		import logging
 		logger = logging.getLogger(__name__)
 		logger.debug(f"[EMIT] Context-based: appending to _CURRENT_CONTEXT.emits. obj={obj}")
 		_CURRENT_CONTEXT.emit(obj)
 		return
 	# Fallback to legacy global approach (for backward compatibility)
 	try:
 		import logging
 		logger = logging.getLogger(__name__)
 		logger.debug(f"[EMIT] Legacy: appending to _PIPE_EMITS. obj type={type(obj).__name__}, _PIPE_EMITS len before={len(_PIPE_EMITS)}")
 		_PIPE_EMITS.append(obj)
 		logger.debug(f"[EMIT] Legacy: _PIPE_EMITS len after={len(_PIPE_EMITS)}")
 	except Exception as e:
 		import logging
 		logger = logging.getLogger(__name__)
 		logger.error(f"[EMIT] Error appending to _PIPE_EMITS: {e}", exc_info=True)
 		pass
 def print_if_visible(*args: Any, file=None, **kwargs: Any) -> None:
 	"""Print only if this is not a quiet mid-pipeline stage.
 	- Always allow errors printed to stderr by callers (they pass file=sys.stderr).
 	- For normal info messages, this suppresses printing for intermediate pipeline stages.
 	- Use this instead of log() in cmdlets when you want stage-aware output.
 	Args:
 		*args: Arguments to print (same as built-in print)
 		file: Output stream (default: stdout)
 		**kwargs: Keyword arguments for print
 	Example:
 		```python
 		# Always shows errors
 		print_if_visible("[error] Something failed", file=sys.stderr)
 		# Only shows in non-piped context or as final stage
 		print_if_visible(f"Processed {count} items")
 		```
 	"""
 	try:
 		# Print if: not in a pipeline OR this is the last stage
 		should_print = (not _PIPE_ACTIVE) or _PIPE_IS_LAST
 		# Always print to stderr regardless
 		if file is not None:
 			should_print = True
 		if should_print:
 			log(*args, **kwargs) if file is None else log(*args, file=file, **kwargs)
 	except Exception:
 		pass
 def store_value(key: str, value: Any) -> None:
 	"""Store a value to pass to later pipeline stages.
 	Values are stored in a shared dictionary keyed by normalized lowercase strings.
 	This allows one stage to prepare data for the next stage without intermediate output.
 	Args:
 		key: Variable name (normalized to lowercase, non-empty)
 		value: Any Python object to store
 	"""
 	if not isinstance(key, str):
 		return
 	text = key.strip().lower()
 	if not text:
 		return
 	try:
 		_PIPELINE_VALUES[text] = value
 	except Exception:
 		pass
 def load_value(key: str, default: Any = None) -> Any:
 	"""Retrieve a value stored by an earlier pipeline stage.
 	Supports dotted path notation for nested access (e.g., "metadata.tags" or "items.0").
 	Args:
 		key: Variable name or dotted path (e.g., "my_var", "metadata.title", "list.0")
 		default: Value to return if key not found or access fails
 	Returns:
 		The stored value, or default if not found
 	"""
 	if not isinstance(key, str):
 		return default
 	text = key.strip()
 	if not text:
 		return default
 	parts = [segment.strip() for segment in text.split('.') if segment.strip()]
 	if not parts:
 		return default
 	root_key = parts[0].lower()
 	container = _PIPELINE_VALUES.get(root_key, _PIPELINE_MISSING)
 	if container is _PIPELINE_MISSING:
 		return default
 	if len(parts) == 1:
 		return container
 	current: Any = container
 	for fragment in parts[1:]:
 		if isinstance(current, dict):
 			fragment_lower = fragment.lower()
 			if fragment in current:
 				current = current[fragment]
 				continue
 			match = _PIPELINE_MISSING
 			for key_name, value in current.items():
 				if isinstance(key_name, str) and key_name.lower() == fragment_lower:
 					match = value
 					break
 			if match is _PIPELINE_MISSING:
 				return default
 			current = match
 			continue
 		if isinstance(current, (list, tuple)):
 			if fragment.isdigit():
 				try:
 					idx = int(fragment)
 				except ValueError:
 					return default
 				if 0 <= idx < len(current):
 					current = current[idx]
 					continue
 			return default
 		if hasattr(current, fragment):
 			try:
 				current = getattr(current, fragment)
 				continue
 			except Exception:
 				return default
 		return default
 	return current
 def reset() -> None:
 	"""Reset all pipeline state. Called between pipeline executions."""
 	global _PIPE_EMITS, _PIPE_ACTIVE, _PIPE_IS_LAST, _PIPELINE_VALUES
 	global _LAST_PIPELINE_CAPTURE, _PIPELINE_REFRESHED, _PIPELINE_LAST_ITEMS
 	global _PIPELINE_COMMAND_TEXT
 	_PIPE_EMITS = []
 	_PIPE_ACTIVE = False
 	_PIPE_IS_LAST = False
 	_LAST_PIPELINE_CAPTURE = None
 	_PIPELINE_REFRESHED = False
 	_PIPELINE_LAST_ITEMS = []
 	_PIPELINE_VALUES = {}
 	_PIPELINE_COMMAND_TEXT = ""
 def get_emitted_items() -> List[Any]:
 	"""Get a copy of all items emitted by the current pipeline stage."""
 	return list(_PIPE_EMITS)
 def clear_emits() -> None:
 	"""Clear the emitted items list (called between stages)."""
 	global _PIPE_EMITS
 	_PIPE_EMITS = []
 def set_last_selection(indices: Sequence[int]) -> None:
 	"""Record the indices selected via @ syntax for the next cmdlet.
 	Args:
 		indices: Iterable of 0-based indices captured from the REPL parser
 	"""
 	global _PIPELINE_LAST_SELECTION
 	_PIPELINE_LAST_SELECTION = list(indices or [])
 def get_last_selection() -> List[int]:
 	"""Return the indices selected via @ syntax for the current invocation."""
 	return list(_PIPELINE_LAST_SELECTION)
 def clear_last_selection() -> None:
 	"""Clear the cached selection indices after a cmdlet finishes."""
 	global _PIPELINE_LAST_SELECTION
 	_PIPELINE_LAST_SELECTION = []
 def set_current_command_text(command_text: Optional[str]) -> None:
 	"""Record the raw pipeline/command text for downstream consumers."""
 	global _PIPELINE_COMMAND_TEXT
 	_PIPELINE_COMMAND_TEXT = (command_text or "").strip()
 def get_current_command_text(default: str = "") -> str:
 	"""Return the last recorded command/pipeline text."""
 	text = _PIPELINE_COMMAND_TEXT.strip()
 	return text if text else default
 def clear_current_command_text() -> None:
 	"""Clear the cached command text after execution completes."""
 	global _PIPELINE_COMMAND_TEXT
 	_PIPELINE_COMMAND_TEXT = ""
 def set_active(active: bool) -> None:
 	"""Internal: Set whether we're in a pipeline context."""
 	global _PIPE_ACTIVE
 	_PIPE_ACTIVE = active
 def set_last_stage(is_last: bool) -> None:
 	"""Internal: Set whether this is the last stage of the pipeline."""
 	global _PIPE_IS_LAST
 	_PIPE_IS_LAST = is_last
 def set_search_query(query: Optional[str]) -> None:
 	"""Internal: Set the last search query for refresh purposes."""
 	global _LAST_SEARCH_QUERY
 	_LAST_SEARCH_QUERY = query
 def get_search_query() -> Optional[str]:
 	"""Get the last search query."""
 	return _LAST_SEARCH_QUERY
 def set_pipeline_refreshed(refreshed: bool) -> None:
 	"""Internal: Track whether the pipeline already refreshed results."""
 	global _PIPELINE_REFRESHED
 	_PIPELINE_REFRESHED = refreshed
 def was_pipeline_refreshed() -> bool:
 	"""Check if the pipeline already refreshed results."""
 	return _PIPELINE_REFRESHED
 def set_last_items(items: list) -> None:
 	"""Internal: Cache the last pipeline outputs."""
 	global _PIPELINE_LAST_ITEMS
 	_PIPELINE_LAST_ITEMS = list(items) if items else []
 def get_last_items() -> List[Any]:
 	"""Get the last pipeline outputs."""
 	return list(_PIPELINE_LAST_ITEMS)
 def set_last_capture(obj: Any) -> None:
 	"""Internal: Store ephemeral handoff for direct pipelines."""
 	global _LAST_PIPELINE_CAPTURE
 	_LAST_PIPELINE_CAPTURE = obj
 def get_last_capture() -> Optional[Any]:
 	"""Get ephemeral pipeline handoff (e.g., URL --screen-shot | ...)."""
 	return _LAST_PIPELINE_CAPTURE
 def set_ui_library_refresh_callback(callback: Any) -> None:
 	"""Set a callback to be called when library content is updated.
 	The callback will be called with:
 		callback(library_filter: str = 'local')
 	Args:
 		callback: A callable that accepts optional library_filter parameter
 	Example:
 		def my_refresh_callback(library_filter='local'):
 			print(f"Refresh library: {library_filter}")
 		set_ui_library_refresh_callback(my_refresh_callback)
 	"""
 	global _UI_LIBRARY_REFRESH_CALLBACK
 	_UI_LIBRARY_REFRESH_CALLBACK = callback
 def get_ui_library_refresh_callback() -> Optional[Any]:
 	"""Get the current library refresh callback."""
 	return _UI_LIBRARY_REFRESH_CALLBACK
 def trigger_ui_library_refresh(library_filter: str = 'local') -> None:
 	"""Trigger a library refresh in the UI if callback is registered.
 	This should be called from cmdlets/funacts after content is added to library.
 	Args:
 		library_filter: Which library to refresh ('local', 'hydrus', etc)
 	"""
 	callback = get_ui_library_refresh_callback()
 	if callback:
 		try:
 			callback(library_filter)
 		except Exception as e:
 			print(f"[trigger_ui_library_refresh] Error calling refresh callback: {e}", file=sys.stderr)
 def set_last_result_table(result_table: Optional[Any], items: Optional[List[Any]] = None) -> None:
 	"""Store the last result table and items for @ selection syntax.
 	This should be called after displaying a result table, so users can reference
 	rows with @2, @2-5, @{1,3,5} syntax in subsequent commands.
 	Also maintains a history stack for @.. navigation (restore previous result table).
 	Only selectable commands (search-file, download-data) should call this to create history.
 	For action commands (delete-tag, add-tag, etc), use set_last_result_table_preserve_history() instead.
 	Args:
 		result_table: The ResultTable object that was displayed (or None)
 		items: List of items that populated the table (optional)
 	"""
 	global _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS, _RESULT_TABLE_HISTORY, _DISPLAY_ITEMS, _DISPLAY_TABLE
 	# Push current table to history before replacing
 	if _LAST_RESULT_TABLE is not None:
 		_RESULT_TABLE_HISTORY.append((_LAST_RESULT_TABLE, _LAST_RESULT_ITEMS.copy()))
 		# Keep history size limited
 		if len(_RESULT_TABLE_HISTORY) > _MAX_RESULT_TABLE_HISTORY:
 			_RESULT_TABLE_HISTORY.pop(0)
 	# Set new current table and clear any display items/table
 	_DISPLAY_ITEMS = []
 	_DISPLAY_TABLE = None
 	_LAST_RESULT_TABLE = result_table
 	_LAST_RESULT_ITEMS = items or []
 def set_last_result_table_overlay(result_table: Optional[Any], items: Optional[List[Any]] = None) -> None:
 	"""Set a result table as an overlay (display only, no history).
 	Used for commands like get-tag that want to show a formatted table but
 	should be treated as a transient view (closing it returns to previous table).
 	Args:
 		result_table: The ResultTable object to display
 		items: List of items for @N selection
 	"""
 	global _DISPLAY_ITEMS, _DISPLAY_TABLE
 	_DISPLAY_TABLE = result_table
 	_DISPLAY_ITEMS = items or []
 def set_last_result_table_preserve_history(result_table: Optional[Any], items: Optional[List[Any]] = None) -> None:
 	"""Update the last result table WITHOUT adding to history.
 	Used for action commands (delete-tag, add-tag, etc.) that modify data but shouldn't
 	create history entries. This allows @.. to navigate search results, not undo stacks.
 	Args:
 		result_table: The ResultTable object that was displayed (or None)
 		items: List of items that populated the table (optional)
 	"""
 	global _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS
 	# Update current table WITHOUT pushing to history
 	_LAST_RESULT_TABLE = result_table
 	_LAST_RESULT_ITEMS = items or []
 def set_last_result_items_only(items: Optional[List[Any]]) -> None:
 	"""Store items for @N selection WITHOUT affecting history or saved search data.
 	Used for display-only commands (get-tag, get-url, etc.) and action commands
 	(delete-tag, add-tag, etc.) that emit results but shouldn't affect history.
 	These items are available for @1, @2, etc. selection in the next command,
 	but are NOT saved to history. This preserves search context for @.. navigation.
 	Args:
 		items: List of items to select from
 	"""
 	global _DISPLAY_ITEMS, _DISPLAY_TABLE
 	# Store items for immediate @N selection, but DON'T modify _LAST_RESULT_ITEMS
 	# This ensures history contains original search data, not display transformations
 	_DISPLAY_ITEMS = items or []
 	# Clear display table since we're setting items only (CLI will generate table if needed)
 	_DISPLAY_TABLE = None
 def restore_previous_result_table() -> bool:
 	"""Restore the previous result table from history (for @.. navigation).
 	Returns:
 		True if a previous table was restored, False if history is empty
 	"""
 	global _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS, _RESULT_TABLE_HISTORY, _DISPLAY_ITEMS, _DISPLAY_TABLE
 	# If we have an active overlay (display items/table), clear it to "go back" to the underlying table
 	if _DISPLAY_ITEMS or _DISPLAY_TABLE:
 		_DISPLAY_ITEMS = []
 		_DISPLAY_TABLE = None
 		return True
 	if not _RESULT_TABLE_HISTORY:
 		return False
 	# Pop from history and restore
 	_LAST_RESULT_TABLE, _LAST_RESULT_ITEMS = _RESULT_TABLE_HISTORY.pop()
 	# Clear display items so get_last_result_items() falls back to restored items
 	_DISPLAY_ITEMS = []
 	_DISPLAY_TABLE = None
 	return True
 def get_display_table() -> Optional[Any]:
 	"""Get the current display overlay table.
 	Returns:
 		The ResultTable object, or None if no overlay table is set
 	"""
 	return _DISPLAY_TABLE
 def get_last_result_table() -> Optional[Any]:
 	"""Get the current last result table.
 	Returns:
 		The ResultTable object, or None if no table is set
 	"""
 	return _LAST_RESULT_TABLE
 def get_last_result_items() -> List[Any]:
 	"""Get the items available for @N selection.
 	Returns items from display/action commands (get-tag, delete-tag, etc.) if available,
 	otherwise returns items from the last search command. This ensures @N selection
 	works for both display operations and search results.
 	Returns:
 		List of items, or empty list if no prior results
 	"""
 	# Prioritize items from display commands (get-tag, delete-tag, etc.)
 	# These are available for immediate @N selection
 	if _DISPLAY_ITEMS:
 		return _DISPLAY_ITEMS
 	# Fall back to items from last search/selectable command
 	return _LAST_RESULT_ITEMS
 def get_last_result_table_source_command() -> Optional[str]:
 	"""Get the source command from the last displayed result table.
 	Returns:
 		Command name (e.g., 'download-data') or None if not set
 	"""
 	if _LAST_RESULT_TABLE and hasattr(_LAST_RESULT_TABLE, 'source_command'):
 		return _LAST_RESULT_TABLE.source_command
 	return None
 def get_last_result_table_source_args() -> List[str]:
 	"""Get the base source arguments from the last displayed result table.
 	Returns:
 		List of arguments (e.g., ['https://example.com']) or empty list
 	"""
 	if _LAST_RESULT_TABLE and hasattr(_LAST_RESULT_TABLE, 'source_args'):
 		return _LAST_RESULT_TABLE.source_args or []
 	return []
 def get_last_result_table_row_selection_args(row_index: int) -> Optional[List[str]]:
 	"""Get the selection arguments for a specific row in the last result table.
 	Args:
 		row_index: Index of the row (0-based)
 	Returns:
 		Selection arguments (e.g., ['-item', '3']) or None
 	"""
 	if _LAST_RESULT_TABLE and hasattr(_LAST_RESULT_TABLE, 'rows'):
 		if 0 <= row_index < len(_LAST_RESULT_TABLE.rows):
 			row = _LAST_RESULT_TABLE.rows[row_index]
 			if hasattr(row, 'selection_args'):
 				return row.selection_args
 	return None
 def set_current_stage_table(result_table: Optional[Any]) -> None:
 	"""Store the current pipeline stage table for @N expansion.
 	Used by cmdlets that display tabular results (e.g., download-data with formats)
 	to make their result table available for @N expansion logic.
 	Does NOT push to history - purely for command expansion in the current pipeline.
 	Args:
 		result_table: The ResultTable object (or None to clear)
 	"""
 	global _CURRENT_STAGE_TABLE
 	_CURRENT_STAGE_TABLE = result_table
 def get_current_stage_table_source_command() -> Optional[str]:
 	"""Get the source command from the current pipeline stage table.
 	Returns:
 		Command name (e.g., 'download-data') or None
 	"""
 	if _CURRENT_STAGE_TABLE and hasattr(_CURRENT_STAGE_TABLE, 'source_command'):
 		return _CURRENT_STAGE_TABLE.source_command
 	return None
 def get_current_stage_table_source_args() -> List[str]:
 	"""Get the source arguments from the current pipeline stage table.
 	Returns:
 		List of arguments or empty list
 	"""
 	if _CURRENT_STAGE_TABLE and hasattr(_CURRENT_STAGE_TABLE, 'source_args'):
 		return _CURRENT_STAGE_TABLE.source_args or []
 	return []
 def get_current_stage_table_row_selection_args(row_index: int) -> Optional[List[str]]:
 	"""Get the selection arguments for a row in the current pipeline stage table.
 	Args:
 		row_index: Index of the row (0-based)
 	Returns:
 		Selection arguments or None
 	"""
 	if _CURRENT_STAGE_TABLE and hasattr(_CURRENT_STAGE_TABLE, 'rows'):
 		if 0 <= row_index < len(_CURRENT_STAGE_TABLE.rows):
 			row = _CURRENT_STAGE_TABLE.rows[row_index]
 			if hasattr(row, 'selection_args'):
 				return row.selection_args
 	return None
 def clear_last_result() -> None:
 	"""Clear the stored last result table and items."""
 	global _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS
 	_LAST_RESULT_TABLE = None
 	_LAST_RESULT_ITEMS = []
 def emit_list(objects: List[Any]) -> None:
 	"""Emit a list of PipeObjects to the next pipeline stage.
 	This allows cmdlets to emit multiple results that are tracked as a list,
 	enabling downstream cmdlets to process all of them or filter by metadata.
 	Args:
 		objects: List of PipeObject instances or dicts to emit
 	"""
 	if _CURRENT_CONTEXT is not None:
 		_CURRENT_CONTEXT.emit(objects)
 	else:
 		_PIPE_EMITS.append(objects)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,183 @@
 [build-system]
 requires = ["setuptools>=65.0", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "medeia-macina"
 version = "0.1.0"
 description = "Comprehensive media management and search platform with support for local files, Hydrus database, torrents, books, and P2P networks"
 readme = "README.md"
 requires-python = ">=3.9,<3.12"
 license = {text = "MIT"}
 authors = [
    {name = "Your Name", email = "your.email@example.com"}
 ]
 keywords = ["media", "search", "management", "hydrus", "download", "cli", "tui"]
 classifiers = [
    "Development Status :: 3 - Alpha",
    "Environment :: Console",
    "Intended Audience :: End Users/Desktop",
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Topic :: Multimedia",
    "Topic :: Internet",
 ]
 dependencies = [
    # Core CLI and TUI frameworks
    "typer>=0.9.0",
    "prompt-toolkit>=3.0.0",
    "textual>=0.30.0",
    # Media processing and downloading
    "yt-dlp>=2023.11.0",
    "yt-dlp-ejs",  # EJS challenge solver scripts for YouTube JavaScript challenges
    "requests>=2.31.0",
    "httpx>=0.25.0",
    "ffmpeg-python>=0.2.0",
    # Document and data handling
    "PyPDF2>=3.0.0",
    "img2pdf>=0.6.0",
    "mutagen>=1.46.0",
    "cbor2>=4.0",
    # Image and media support
    "Pillow>=10.0.0",
    "python-bidi>=0.4.2",
    # Metadata extraction and processing
    "musicbrainzngs>=0.7.0",
    "beautifulsoup4>=4.12.0",
    "lxml>=4.9.0",
    # Advanced searching and libraries
    "libgen-api>=1.0.0",
    "aioslsk>=1.6.0",
    "imdbinfo>=0.1.10",
    # Encryption and security
    "pycryptodome>=3.18.0",
    # Data processing
    "bencode3",
    "tqdm>=4.66.0",
    # Browser automation
    "playwright>=1.40.0",
    # Development and utilities
    "python-dateutil>=2.8.0",
 ]
 [project.optional-dependencies]
 dev = [
    # Testing
    "pytest>=7.4.0",
    "pytest-cov>=4.1.0",
    "pytest-asyncio>=0.21.0",
    # Code quality
    "black>=23.11.0",
    "flake8>=6.1.0",
    "isort>=5.12.0",
    "mypy>=1.7.0",
    "pylint>=3.0.0",
    # Documentation
    "sphinx>=7.2.0",
    "sphinx-rtd-theme>=1.3.0",
    # Debugging and profiling
    "ipython>=8.17.0",
    "ipdb>=0.13.0",
    "memory-profiler>=0.61.0",
    # Version control and CI/CD helpers
    "pre-commit>=3.5.0",
 ]
 [project.scripts]
 mm = "medeia_macina.cli_entry:main"
 medeia = "medeia_macina.cli_entry:main"
 [project.urls]
 Homepage = "https://github.com/yourusername/medeia-macina"
 Documentation = "https://medeia-macina.readthedocs.io"
 Repository = "https://github.com/yourusername/medeia-macina.git"
 Issues = "https://github.com/yourusername/medeia-macina/issues"
 [tool.setuptools]
 packages = ["cmdlets", "helper", "TUI", "medeia_macina"]
 [tool.black]
 line-length = 100
 target-version = ['py39', 'py310', 'py311', 'py312']
 include = '\.pyi?$'
 extend-exclude = '''
 /(
  # directories
  \.eggs
  | \.git
  | \.hg
  | \.mypy_cache
  | \.tox
  | \.venv
  | _build
  | buck-out
  | build
  | dist
  | __pycache__
 )/
 '''
 [tool.isort]
 profile = "black"
 line_length = 100
 target_version = ["py39", "py310", "py311", "py312"]
 [tool.mypy]
 python_version = "3.9"
 warn_return_any = true
 warn_unused_configs = true
 disallow_untyped_defs = false
 disallow_incomplete_defs = false
 check_untyped_defs = false
 no_implicit_optional = true
 warn_redundant_casts = true
 warn_unused_ignores = true
 warn_no_return = true
 [tool.pylint.messages_control]
 disable = [
    "C0330", "C0326",  # Bad whitespace
    "R0913",            # Too many arguments
    "R0914",            # Too many local variables
 ]
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = ["test_*.py", "*_test.py"]
 addopts = "-v --cov=. --cov-report=html --cov-report=term-missing"
 [tool.coverage.run]
 branch = true
 omit = [
    "*/tests/*",
    "*/__main__.py",
 ]
 [tool.coverage.report]
 exclude_lines = [
    "pragma: no cover",
    "def __repr__",
    "raise AssertionError",
    "raise NotImplementedError",
    "if __name__ == .__main__.:",
    "if TYPE_CHECKING:",
 ]
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -0,0 +1,29 @@
 # Development dependencies for Medeia-Macina
 # Install with: pip install -r requirements-dev.txt
 # Main requirements
 -r requirements.txt
 # Testing
 pytest>=7.4.0
 pytest-cov>=4.1.0
 pytest-asyncio>=0.21.0
 # Code quality
 black>=23.11.0
 flake8>=6.1.0
 isort>=5.12.0
 mypy>=1.7.0
 pylint>=3.0.0
 # Documentation
 sphinx>=7.2.0
 sphinx-rtd-theme>=1.3.0
 # Debugging and profiling
 ipython>=8.17.0
 ipdb>=0.13.0
 memory-profiler>=0.61.0
 # Version control and CI/CD helpers
 pre-commit>=3.5.0
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,43 @@
 # Core CLI and TUI frameworks
 typer>=0.9.0
 prompt-toolkit>=3.0.0
 textual>=0.30.0
 # Media processing and downloading
 yt-dlp>=2023.11.0
 requests>=2.31.0
 httpx>=0.25.0
 ffmpeg-python>=0.2.0
 # Document and data handling
 PyPDF2>=3.0.0
 img2pdf>=0.6.0
 mutagen>=1.46.0
 cbor2>=4.0
 # Image and media support
 Pillow>=10.0.0
 python-bidi>=0.4.2
 # Metadata extraction and processing
 musicbrainzngs>=0.7.0
 beautifulsoup4>=4.12.0
 lxml>=4.9.0
 # Advanced searching and libraries
 libgen-api>=1.0.0
 aioslsk>=1.6.0
 imdbinfo>=0.1.10
 # Encryption and security (if needed by Crypto usage)
 pycryptodome>=3.18.0
 # Data processing
 bencode3
 tqdm>=4.66.0
 # Browser automation (for web scraping if needed)
 playwright>=1.40.0
 # Development and utilities
 python-dateutil>=2.8.0
--- a/result_table.py
+++ b/result_table.py
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,39 @@
 """
 Setup configuration for Medeia-Macina.
 Medeia-Macina is a comprehensive media and data management system with support for:
 - Video downloading from multiple sources (YouTube, etc.)
 - Local and cloud-based file storage
 - Advanced metadata and tag management
 - Full-featured TUI and CLI interfaces
 """
 from setuptools import setup, find_packages
 with open("requirements.txt") as f:
    requirements = [line.strip() for line in f if line.strip() and not line.startswith("#")]
 setup(
    name="medeia-macina",
    version="1.0.0",
    description="Comprehensive media and data management system",
    author="Anonymous",
    python_requires=">=3.9",
    packages=find_packages(exclude=["tests", "*.tests"]),
    install_requires=requirements,
    entry_points={
        "console_scripts": [
            "mm=medeia_macina.cli_entry:main",
            "medeia=medeia_macina.cli_entry:main",
        ],
    },
    classifiers=[
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
        "Programming Language :: Python :: 3.11",
        "Programming Language :: Python :: 3.12",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
    ],
 )
		`@@ -0,0 +1 @@`
							`"""Medeia-Macina TUI - Terminal User Interface."""`
		`@@ -0,0 +1,2 @@`
							`"""Medeia-Macina package - Media management system."""`
							`__version__ = "0.1.0"`