Medios-Macina/cmdlets/add_file.py

from __future__ import annotations

from typing import Any, Dict, Optional, Sequence, Iterable, Tuple
from collections.abc import Iterable as IterableABC
import json
from pathlib import Path
import sys

import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from helper.logger import log, debug
from helper.file_storage import FileStorage
from ._shared import (
    Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs, create_pipe_object_result,
    extract_tags_from_result, extract_title_from_result, extract_known_urls_from_result,
    merge_sequences, extract_relationships, extract_duration
)
from ._shared import collapse_namespace_tags
from helper.local_library import read_sidecar, find_sidecar, write_sidecar, LocalLibraryDB
from helper.utils import sha256_file
from metadata import embed_metadata_in_file

# Use official Hydrus supported filetypes from hydrus_wrapper
SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS

# Initialize file storage system
storage = FileStorage()


def _guess_media_kind_from_suffix(media_path: Path) -> str:
    suffix = media_path.suffix.lower()
    if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}:
        return 'audio'
    if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
        return 'video'
    if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}:
        return 'image'
    if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}:
        return 'document'
    return 'other'


def _resolve_media_kind(result: Any, media_path: Path) -> str:
    if isinstance(result, models.PipeObject):
        if getattr(result, 'media_kind', None):
            return str(result.media_kind)
    elif isinstance(result, dict):
        media_kind = result.get('media_kind')
        if media_kind:
            return str(media_kind)
        metadata = result.get('metadata')
        if isinstance(metadata, dict) and metadata.get('media_kind'):
            return str(metadata['media_kind'])
    return _guess_media_kind_from_suffix(media_path)


def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config: Optional[dict] = None) -> tuple[Optional[Path], Optional[str], list[str], list[str]]:
    # For local origin, try to read from local database first
    if origin and origin.lower() == "local" and config:
        try:
            from helper.local_library import LocalLibraryDB
            from config import get_local_storage_path
            
            try:
                db_root = get_local_storage_path(config)
            except Exception:
                db_root = None
            
            if db_root:
                try:
                    with LocalLibraryDB(Path(db_root)) as db:
                        # Get tags and metadata from database
                        tags = db.get_tags(media_path) or []
                        metadata = db.get_metadata(media_path) or {}
                        known_urls = metadata.get("known_urls") or []
                        file_hash = metadata.get("hash")
                        
                        if tags or known_urls or file_hash:
                            debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)")
                            return None, file_hash, tags, known_urls
                except Exception as exc:
                    log(f"⚠️  Could not query local database: {exc}", file=sys.stderr)
        except Exception:
            pass
    
    # Fall back to sidecar file lookup
    try:
        sidecar_path = find_sidecar(media_path)
    except Exception:
        sidecar_path = None
    if not sidecar_path or not sidecar_path.exists():
        return None, None, [], []
    try:
        hash_value, tags, known_urls = read_sidecar(sidecar_path)
        return sidecar_path, hash_value, tags or [], known_urls or []
    except Exception as exc:
        log(f"⚠️  Failed to read sidecar for {media_path.name}: {exc}", file=sys.stderr)
        return sidecar_path, None, [], []


def _resolve_file_hash(result: Any, fallback_hash: Optional[str], file_path: Path) -> Optional[str]:
    candidate = None
    if isinstance(result, models.PipeObject):
        candidate = result.file_hash
    elif isinstance(result, dict):
        candidate = result.get('file_hash') or result.get('hash')
    candidate = candidate or fallback_hash
    if candidate:
        return str(candidate)
    try:
        return sha256_file(file_path)
    except Exception as exc:
        log(f"⚠️  Could not compute SHA-256 for {file_path.name}: {exc}", file=sys.stderr)
        return None


def _cleanup_sidecar_files(media_path: Path, *extra_paths: Optional[Path]) -> None:
    targets = [
        media_path.parent / (media_path.name + '.metadata'),
        media_path.parent / (media_path.name + '.notes'),
        media_path.parent / (media_path.name + '.tags'),
        media_path.parent / (media_path.name + '.tags.txt'),
    ]
    targets.extend(extra_paths)
    for target in targets:
        if not target:
            continue
        try:
            path_obj = Path(target)
            if path_obj.exists():
                path_obj.unlink()
        except Exception:
            continue


def _show_local_result_table(file_hash: Optional[str], config: Dict[str, Any]) -> None:
    """Run search-file by hash to display the newly added local file in a table."""
    if not file_hash:
        return
    try:
        from cmdlets import search_file as search_cmd
        temp_ctx = models.PipelineStageContext(0, 1)
        saved_ctx = ctx.get_stage_context()
        ctx.set_stage_context(temp_ctx)
        try:
            # Call the cmdlet exactly like the user would type: search-file "hash:...,store:local"
            search_cmd._run(None, [f"hash:{file_hash},store:local"], config)
            try:
                table = ctx.get_last_result_table()
                if table is not None:
                    log("")
                    log(table.format_plain())
            except Exception:
                pass
        finally:
            ctx.set_stage_context(saved_ctx)
    except Exception as exc:
        debug(f"[add-file] Skipped search-file display: {exc}")


def _persist_local_metadata(
    library_root: Path,
    dest_path: Path,
    tags: list[str],
    known_urls: list[str],
    file_hash: Optional[str],
    relationships: Optional[Dict[str, Any]],
    duration: Optional[float],
    media_kind: str,
) -> None:
    payload = {
        'hash': file_hash,
        'known_urls': known_urls,
        'relationships': relationships or [],
        'duration': duration,
        'size': None,
        'ext': dest_path.suffix.lower(),
        'media_type': media_kind,
        'media_kind': media_kind,
    }
    try:
        payload['size'] = dest_path.stat().st_size
    except OSError:
        payload['size'] = None

    try:
        debug(f"[_persist_local_metadata] Saving metadata to DB at: {library_root}")
        db_path = Path(library_root) / ".downlow_library.db"
        debug(f"[_persist_local_metadata] Database file: {db_path}, exists: {db_path.exists()}")
        debug(f"[_persist_local_metadata] File: {dest_path}, exists: {dest_path.exists()}, Tags: {len(tags)}, Hash: {file_hash}")
        debug(f"[_persist_local_metadata] Absolute dest_path: {dest_path.resolve()}")
        
        with LocalLibraryDB(library_root) as db:
            # Use optimized single-transaction save
            debug(f"[_persist_local_metadata] Saving metadata and {len(tags)} tags to DB")
            try:
                db.save_file_info(dest_path, payload, tags)
                debug(f"[_persist_local_metadata] ✅ File info saved to DB")
            except Exception as exc:
                log(f"[_persist_local_metadata] ❌ Failed to save file info: {exc}", file=sys.stderr)
                raise
        
        # NOTE: Sidecar files are intentionally NOT created for local storage
        # Local storage uses database as primary source, not sidecar files
        
        debug(f"[_persist_local_metadata] ✅ Metadata persisted successfully")
    except Exception as exc:
        log(f"⚠️  Failed to persist metadata to local database: {exc}", file=sys.stderr)
        import traceback
        log(traceback.format_exc(), file=sys.stderr)


def _handle_local_transfer(
    media_path: Path,
    destination_root: Path,
    result: Any,
    config: Optional[Dict[str, Any]] = None,
    export_mode: bool = False,
) -> Tuple[int, Optional[Path]]:
    """Transfer a file to local storage and return (exit_code, destination_path).
    
    Args:
        media_path: Path to source file
        destination_root: Destination directory
        result: Result object with metadata
        config: Configuration dictionary
        
    Returns:
        Tuple of (exit_code, destination_path)
        - exit_code: 0 on success, 1 on failure
        - destination_path: Path to moved file on success, None on failure
    """
    destination_root = destination_root.expanduser()
    try:
        destination_root.mkdir(parents=True, exist_ok=True)
    except Exception as exc:
        log(f"Cannot prepare destination directory {destination_root}: {exc}", file=sys.stderr)
        return 1, None

    
    tags_from_result = extract_tags_from_result(result)
    urls_from_result = extract_known_urls_from_result(result)
    # Get origin from result if available
    result_origin = None
    if hasattr(result, "origin"):
        result_origin = result.origin
    elif isinstance(result, dict):
        result_origin = result.get("origin") or result.get("source")
    sidecar_path, sidecar_hash, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=result_origin, config=config)
    
    # Normalize all title tags to use spaces instead of underscores BEFORE merging
    # This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
    def normalize_title_tag(tag: str) -> str:
        """Normalize a title tag by replacing underscores with spaces."""
        if str(tag).strip().lower().startswith("title:"):
            parts = tag.split(":", 1)
            if len(parts) == 2:
                value = parts[1].replace("_", " ").strip()
                return f"title:{value}"
        return tag
    
        tags_from_result = collapse_namespace_tags([normalize_title_tag(t) for t in tags_from_result], "title", prefer="last")
        sidecar_tags = collapse_namespace_tags([normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last")
    
    # Merge tags carefully: if URL has title tag, don't include sidecar title tags
    # This prevents duplicate title: tags when URL provides a title
    has_url_title = any(str(t).strip().lower().startswith("title:") for t in tags_from_result)
    if has_url_title:
        # URL has a title, filter out any sidecar title tags to avoid duplication
        sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")]
        merged_tags = merge_sequences(tags_from_result, sidecar_tags_filtered, case_sensitive=True)
    else:
        # No URL title, use all sidecar tags
        merged_tags = merge_sequences(tags_from_result, sidecar_tags, case_sensitive=True)
    
    merged_urls = merge_sequences(urls_from_result, sidecar_urls, case_sensitive=False)
    relationships = extract_relationships(result)
    duration = extract_duration(result)

    # Skip title-based renaming for library mode (hash-based) but allow for export mode below

    try:
        if export_mode:
            title_tag = next((t for t in merged_tags if str(t).strip().lower().startswith("title:")), None)
            title_value = ""
            if title_tag:
                title_value = title_tag.split(":", 1)[1].strip()
            if not title_value:
                title_value = media_path.stem.replace("_", " ").strip()
            # Sanitize filename
            safe_title = "".join(c for c in title_value if c.isalnum() or c in " ._-()[]{}'`").strip()
            base_name = safe_title or media_path.stem
            new_name = base_name + media_path.suffix
            target_path = destination_root / new_name
            destination_root.mkdir(parents=True, exist_ok=True)
            if target_path.exists():
                from helper.utils import unique_path
                target_path = unique_path(target_path)
            shutil.move(str(media_path), target_path)

            # Move/copy sidecar files alongside
            possible_sidecars = [
                media_path.with_suffix(media_path.suffix + ".json"),
                media_path.with_name(media_path.name + ".tags"),
                media_path.with_name(media_path.name + ".tags.txt"),
                media_path.with_name(media_path.name + ".metadata"),
                media_path.with_name(media_path.name + ".notes"),
            ]
            for sc in possible_sidecars:
                try:
                    if sc.exists():
                        suffix_part = sc.name.replace(media_path.name, "", 1)
                        dest_sidecar = target_path.parent / f"{target_path.name}{suffix_part}"
                        dest_sidecar.parent.mkdir(parents=True, exist_ok=True)
                        shutil.move(str(sc), dest_sidecar)
                except Exception:
                    pass
            media_path = target_path
            dest_file = str(target_path)
        else:
            # Ensure filename is the hash when adding to local storage
            resolved_hash = _resolve_file_hash(result, sidecar_hash, media_path)
            hashed_move_done = False
            if resolved_hash:
                hashed_name = resolved_hash + media_path.suffix
                target_path = destination_root / hashed_name
                try:
                    if target_path.exists():
                        target_path.unlink()
                except Exception:
                    pass
                if media_path != target_path:
                    media_path = media_path.rename(target_path)
                hashed_move_done = True

            if hashed_move_done and media_path.parent.samefile(destination_root):
                # Already placed at final destination with hash name; skip extra upload/move
                dest_file = str(media_path)
            else:
                dest_file = storage["local"].upload(media_path, location=str(destination_root), move=True)
    except Exception as exc:
        log(f"❌ Failed to move file into {destination_root}: {exc}", file=sys.stderr)
        return 1, None

    dest_path = Path(dest_file)
    file_hash = _resolve_file_hash(result, resolved_hash, dest_path)
    media_kind = _resolve_media_kind(result, dest_path)
    
    # If we have a title tag, keep it. Otherwise, derive from filename.
    has_title = any(str(t).strip().lower().startswith("title:") for t in merged_tags)
    final_tags = collapse_namespace_tags(merged_tags, "title", prefer="last")
    
    if not has_title:
        filename_title = dest_path.stem.replace("_", " ").strip()
        if filename_title:
            final_tags.insert(0, f"title:{filename_title}")
    
    if not export_mode:
        _persist_local_metadata(destination_root, dest_path, final_tags, merged_urls, file_hash, relationships, duration, media_kind)
        _cleanup_sidecar_files(media_path, sidecar_path)
        _show_local_result_table(file_hash, config or {})
    else:
        debug(f"✅ Exported to destination: {dest_path}")
    return 0, dest_path


def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
    """Upload/copy a file to specified location.

    Returns 0 on success, non-zero on failure.
    """
    import sys  # For stderr output
    
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass
    
    debug("Starting add-file cmdlet")

    # Handle list of results (from piped commands that emit multiple items)
    if isinstance(result, list):
        debug(f"Processing {len(result)} piped files")
        success_count = 0
        for item in result:
            exit_code = _run(item, _args, config)
            if exit_code == 0:
                success_count += 1
        return 0 if success_count > 0 else 1

    # Parse arguments using CMDLET spec
    parsed = parse_cmdlet_args(_args, CMDLET)
    location: Optional[str] = None
    provider_name: Optional[str] = None
    delete_after_upload = False
    
    # Check if -path argument was provided
    path_arg = parsed.get("path")
    if path_arg:
        path_value = Path(str(path_arg).strip())
        # If there is no piped result, treat -path as the source file (existing behavior)
        if result is None:
            if not path_value.exists():
                log(f"❌ File not found: {path_value}")
                return 1
            result = {"target": str(path_value), "origin": "wild"}
            log(f"Using direct file path: {path_value}")
        else:
            # Piped result present: treat -path as destination (export)
            if not path_value.exists():
                try:
                    path_value.mkdir(parents=True, exist_ok=True)
                except Exception as exc:
                    log(f"❌ Cannot create destination directory {path_value}: {exc}", file=sys.stderr)
                    return 1
            location = str(path_value)
    
    # Get location from parsed args - now uses SharedArgs.STORAGE so key is "storage"
    storage_arg = parsed.get("storage")
    if location is None:
        location = storage_arg
        if location:
            location = str(location).lower().strip()
    elif storage_arg:
        # User provided both -path (as destination) and -storage; prefer explicit storage only if it matches
        storage_str = str(storage_arg).lower().strip()
        if storage_str != str(location).lower():
            log(f"❌ Conflicting destinations: -path '{location}' vs -storage '{storage_str}'", file=sys.stderr)
            return 1
    
    # Get file provider from parsed args
    provider_name = parsed.get("provider")
    if provider_name:
        provider_name = str(provider_name).lower().strip()
    
    # Check for delete flag (presence in parsed dict means it was provided)
    delete_after_upload = "delete" in parsed
    
    # Either storage or provider must be specified, but not both
    if location is None and provider_name is None:
        log("Either -storage or -provider must be specified")
        log("  -storage options: 'hydrus', 'local', or a directory path")
        log("  -provider options: '0x0'")
        return 1
    
    if location is not None and provider_name is not None:
        log("❌ Cannot specify both -storage and -provider")
        return 1
    
    # Validate location (storage backends)
    is_valid_location = False
    if location is not None:
        valid_locations = {'hydrus', 'local'}
        is_valid_location = location in valid_locations
    is_local_path = not is_valid_location and location is not None and ('/' in location or '\\' in location or ':' in location)
    
    if location is not None and not (is_valid_location or is_local_path):
        log(f"❌ Invalid location: {location}")
        log(f"Valid options: 'hydrus', '0x0', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
        return 1

    # Extract tags/known URLs from pipeline objects if available
    pipe_object_tags = extract_tags_from_result(result)
    if pipe_object_tags:
        debug(f"Extracted {len(pipe_object_tags)} tag(s) from pipeline result: {', '.join(pipe_object_tags[:5])}", file=sys.stderr)
    pipe_known_urls = extract_known_urls_from_result(result)
    
    # Resolve media path: get from piped result
    # Support both object attributes (getattr) and dict keys (get)
    target = None
    origin = None
    
    # Try object attributes first
    if hasattr(result, "target"):
        target = result.target
    elif hasattr(result, "path"):
        target = result.path
    elif hasattr(result, "file_path"):
        target = result.file_path
    # Try dict keys if object attributes failed
    elif isinstance(result, dict):
        target = (result.get("target") or result.get("path") or result.get("file_path") or 
                  result.get("__file_path") or result.get("__path") or result.get("__target"))
    
    # Get origin to detect Hydrus files
    if hasattr(result, "origin"):
        origin = result.origin
    elif hasattr(result, "source"):
        origin = result.source
    elif isinstance(result, dict):
        origin = result.get("origin") or result.get("source") or result.get("__source")
    
    # Convert target to string and preserve URLs (don't let Path() mangle them)
    target_str = str(target) if target else None
    
    # Check if this is a playlist item that needs to be downloaded first
    is_playlist_item = isinstance(result, dict) and result.get("__source") == "playlist-probe"
    if is_playlist_item and target_str and target_str.lower().startswith(("http://", "https://")):
        # This is a playlist item URL - we need to download it first
        log(f"Detected playlist item, downloading: {target_str}", file=sys.stderr)
        
        # Extract item number if available
        item_num = None
        if "__action" in result and result["__action"].startswith("playlist-item:"):
            item_num = result["__action"].split(":")[1]
        elif "index" in result:
            item_num = result["index"]
        
        # Call download-data to download this specific item
        # Pass the item number so it knows which track to download
        from cmdlets import download_data as dl_module
        
        # Capture emissions from download-data to process them
        captured_results = []
        original_emit = ctx.emit
        
        def capture_emit(obj):
            captured_results.append(obj)
            # Also emit to original so user sees progress/output if needed
            # But since add-file is usually terminal, we might not need to
            # original_emit(obj) 
        
        # Temporarily hook the pipeline emit function
        ctx.emit = capture_emit
        
        try:
            if item_num:
                # Pass a marker dict to tell download-data which item to get
                download_result = dl_module._run(
                    {
                        "__playlist_url": str(target_str),
                        "__playlist_item": int(item_num)
                    },
                    [],
                    config
                )
            else:
                # Fallback: just download the URL (will show all items)
                download_result = dl_module._run(None, [str(target_str)], config)
        finally:
            # Restore original emit function
            ctx.emit = original_emit
        
        if download_result != 0:
            log(f"❌ Failed to download playlist item", file=sys.stderr)
            return 1
            
        log(f"✓ Playlist item downloaded, processing {len(captured_results)} file(s)...", file=sys.stderr)
        
        # Process the downloaded files recursively
        success_count = 0
        for res in captured_results:
            # Recursively call add-file with the downloaded result
            # This ensures tags and metadata from download-data are applied
            if _run(res, _args, config) == 0:
                success_count += 1
        
        return 0 if success_count > 0 else 1
    # Determine media_path from result
    media_path: Optional[Path] = None
    is_hydrus_file = origin and origin.lower() == "hydrus"
    
    if target_str:
        # Check if it's a URL or Hydrus hash
        if target_str.lower().startswith(("http://", "https://")):
            media_path = None  # Will handle as Hydrus file below
        elif not is_hydrus_file:
            # Only treat as local path if not a Hydrus file
            media_path = Path(target_str)
    
    if media_path is None and not is_hydrus_file and (target_str is None or not target_str.lower().startswith(("http://", "https://"))):
        # Check if this is a format object from download-data
        if isinstance(result, dict) and result.get('format_id') is not None:
            log("❌ Format object received, but add-file expects a downloaded file")
            log(f"   Tip: Use @N to automatically select and download the format")
            log(f"   Streamlined workflow:")
            log(f"     download-data \"URL\" | @{result.get('index', 'N')} | add-file -storage local")
            log(f"   (The @N automatically expands to download-data \"URL\" -item N)")
            return 1
        log("❌ File not found: provide a piped file result or local file path")
        return 1
    
    # Check if this is a Hydrus file - fetch the actual file path from Hydrus
    if is_hydrus_file and target_str:
        log(f"Detected Hydrus file (hash: {target_str}), fetching local path from Hydrus...", file=sys.stderr)
        try:
            from helper import hydrus
            
            # Get the Hydrus client
            client = hydrus.get_client(config)
            if not client:
                log(f"❌ Hydrus client unavailable", file=sys.stderr)
                return 1
            
            # target_str is the hash - need to get the actual file path from Hydrus
            file_hash = target_str
            
            # Call the /get_files/file_path endpoint to get the actual file path
            response = client.get_file_path(file_hash)
            if not response or not isinstance(response, dict):
                log(f"❌ Hydrus file_path endpoint returned invalid response", file=sys.stderr)
                return 1
            
            file_path_str = response.get("path")
            if not file_path_str:
                log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
                return 1
            
            media_path = Path(file_path_str)
            if not media_path.exists():
                log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
                return 1
            
            log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)
                
        except Exception as exc:
            log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
            import traceback
            log(f"Traceback: {traceback.format_exc()}", file=sys.stderr)
            return 1

    # Generic URL handler: if target is a URL and we haven't resolved a local path yet
    # This handles cases like "search-file -provider openlibrary ... | add-file -storage local"
    if target_str and target_str.lower().startswith(("http://", "https://")) and not is_hydrus_file and not is_playlist_item and media_path is None:
        log(f"Target is a URL, delegating to download-data: {target_str}", file=sys.stderr)
        from cmdlets import download_data as dl_module
        
        dl_args = []
        if location:
            dl_args.extend(["-storage", location])
        
        # Map provider 0x0 to storage 0x0 for download-data
        if provider_name == "0x0":
             dl_args.extend(["-storage", "0x0"])
        
        # Capture results from download-data so we can add them to DB
        captured_results = []
        original_emit = ctx.emit
        
        def capture_emit(obj):
            captured_results.append(obj)
            original_emit(obj)
            
        ctx.emit = capture_emit
        
        try:
            ret_code = dl_module._run(result, dl_args, config)
        finally:
            ctx.emit = original_emit
            
        if ret_code != 0:
            return ret_code
            
        # Process the downloaded files recursively to add them to DB
        if captured_results:
            log(f"Processing {len(captured_results)} downloaded file(s)...", file=sys.stderr)
            success_count = 0
            for res in captured_results:
                # Recursively call add-file with the downloaded result
                if _run(res, _args, config) == 0:
                    success_count += 1
            return 0 if success_count > 0 else 1
            
        return 0
    
    if media_path is None:
        log("File path could not be resolved")
        return 1
    
    if not media_path.exists() or not media_path.is_file():
        log(f"File not found: {media_path}")
        return 1
    
    # Validate file type - only accept Hydrus-supported files
    file_extension = media_path.suffix.lower()
    if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
        log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
        log(f"Hydrus supports the following file types:", file=sys.stderr)
        # Display by category from hydrus_wrapper
        for category, extensions in sorted(hydrus_wrapper.SUPPORTED_FILETYPES.items()):
            ext_list = ', '.join(sorted(e.lstrip('.') for e in extensions.keys()))
            log(f"{category.capitalize()}: {ext_list}", file=sys.stderr)
        log(f"Skipping this file: {media_path.name}", file=sys.stderr)
        return 1

    # Handle based on provider or storage
    if provider_name is not None:
        # Use file provider (e.g., 0x0.st)
        from helper.search_provider import get_file_provider
        
        log(f"Uploading via {provider_name} file provider: {media_path.name}", file=sys.stderr)
        
        try:
            file_provider = get_file_provider(provider_name, config)
            if file_provider is None:
                log(f"File provider '{provider_name}' not available", file=sys.stderr)
                return 1
            
            hoster_url = file_provider.upload(media_path)
            log(f"File uploaded to {provider_name}: {hoster_url}", file=sys.stderr)
            
            # Associate the URL with the file in Hydrus if possible
            current_hash = locals().get('file_hash')
            if not current_hash:
                current_hash = _resolve_file_hash(result, None, media_path)
            
            if current_hash:
                try:
                    client = hydrus_wrapper.get_client(config)
                    if client:
                        client.associate_url(current_hash, hoster_url)
                        debug(f"Associated URL with file hash {current_hash}", file=sys.stderr)
                except Exception as exc:
                    log(f"Could not associate URL with Hydrus file: {exc}", file=sys.stderr)
            
        except Exception as exc:
            log(f"{provider_name} upload failed: {exc}", file=sys.stderr)
            return 1
        
        if delete_after_upload:
            try:
                media_path.unlink()
                _cleanup_sidecar_files(media_path)
                log(f"✅ Deleted file and sidecar", file=sys.stderr)
            except Exception as exc:
                log(f"⚠️  Could not delete file: {exc}", file=sys.stderr)
        
        return 0
    
    # Handle storage-based operations (location is not None here)
    valid_locations = {'hydrus', 'local', 'matrix'}
    is_valid_location = location in valid_locations
    is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location)
    
    if not (is_valid_location or is_local_path):
        log(f"❌ Invalid location: {location}")
        log(f"Valid options: 'hydrus', 'local', 'matrix', or a directory path")
        return 1
    
    if location == 'local':
        try:
            from config import get_local_storage_path
            resolved_dir = get_local_storage_path(config)
        except Exception:
            resolved_dir = None

        if not resolved_dir:
            resolved_dir = config.get("LocalDir") or config.get("OutputDir")

        if not resolved_dir:
            log("❌ No local storage path configured. Set 'storage.local.path' in config.json", file=sys.stderr)
            return 1

        debug(f"Moving into configured local library: {resolved_dir}", file=sys.stderr)
        exit_code, dest_path = _handle_local_transfer(media_path, Path(resolved_dir), result, config)
        
        # After successful local transfer, emit result for pipeline continuation
        # This allows downstream commands like add-tags to chain automatically
        if exit_code == 0 and dest_path:
            # Extract tags from result for emission
            emit_tags = extract_tags_from_result(result)
            file_hash = _resolve_file_hash(result, None, dest_path)
            
            # Extract title from original result, fallback to filename if not available
            result_title = extract_title_from_result(result) or dest_path.name
            
            # Always emit result for local files, even if no tags
            # This allows @N selection and piping to downstream commands
            result_dict = create_pipe_object_result(
                source='local',
                identifier=str(dest_path),
                file_path=str(dest_path),
                cmdlet_name='add-file',
                title=result_title,
                file_hash=file_hash,
                tags=emit_tags if emit_tags else [],
                target=str(dest_path) # Explicit target for get-file
            )
            ctx.emit(result_dict)
            
            # Clear the stage table so downstream @N doesn't try to re-run download-data
            # Next stage will use these local file results, not format objects
            ctx.set_current_stage_table(None)
        
        return exit_code
    
    elif is_local_path:
        try:
            destination_root = Path(location)
        except Exception as exc:
            log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
            return 1

        log(f"Moving to local path: {destination_root}", file=sys.stderr)
        exit_code, dest_path = _handle_local_transfer(media_path, destination_root, result, config, export_mode=True)
        
        # After successful local transfer, emit result for pipeline continuation
        if exit_code == 0 and dest_path:
            # Extract tags from result for emission
            emit_tags = extract_tags_from_result(result)
            file_hash = _resolve_file_hash(result, None, dest_path)
            
            # Extract title from original result, fallback to filename if not available
            result_title = extract_title_from_result(result) or dest_path.name
            
            # Always emit result for local files, even if no tags
            # This allows @N selection and piping to downstream commands
            result_dict = create_pipe_object_result(
                source='local',
                identifier=str(dest_path),
                file_path=str(dest_path),
                cmdlet_name='add-file',
                title=result_title,
                file_hash=file_hash,
                tags=emit_tags if emit_tags else [],
                target=str(dest_path) # Explicit target for get-file
            )
            ctx.emit(result_dict)
            
            # Clear the stage table so downstream @N doesn't try to re-run download-data
            # Next stage will use these local file results, not format objects
            ctx.set_current_stage_table(None)
        
        return exit_code
    
    elif location == 'matrix':
        log(f"Uploading to Matrix: {media_path.name}", file=sys.stderr)
        try:
            result_url = storage["matrix"].upload(media_path, config=config)
            log(f"Matrix: {result_url}", file=sys.stderr)
            
            result_dict = create_pipe_object_result(
                source='matrix',
                identifier=result_url,
                file_path=str(media_path),
                cmdlet_name='add-file',
                title=media_path.name,
                target=result_url
            )
            ctx.emit(result_dict)
            
        except Exception as exc:
            log(f"Failed: {exc}", file=sys.stderr)
            return 1
            
        if delete_after_upload:
            try:
                media_path.unlink()
                _cleanup_sidecar_files(media_path)
                log(f"✅ Deleted file and sidecar", file=sys.stderr)
            except Exception as exc:
                log(f"⚠️  Could not delete file: {exc}", file=sys.stderr)
        
        return 0

    # location == 'hydrus'
    # Compute file hash to check if already in Hydrus
    log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr)
    log(f"Computing SHA-256 hash for: {media_path.name}", file=sys.stderr)
    try:
        file_hash = sha256_file(media_path)
    except Exception as exc:
        log(f"❌ Failed to compute file hash: {exc}", file=sys.stderr)
        return 1
    debug(f"File hash: {file_hash}", file=sys.stderr)

    # Read sidecar tags and known URLs first (for tagging)

    sidecar_path, hash_from_sidecar, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=origin, config=config)
    if sidecar_path:
        log(f"Found sidecar at: {sidecar_path}", file=sys.stderr)
        log(f"Read sidecar: hash={hash_from_sidecar}, {len(sidecar_tags)} tag(s), {len(sidecar_urls)} URL(s)", file=sys.stderr)
        if sidecar_tags:
            log(f"Sidecar tags: {sidecar_tags}", file=sys.stderr)
        if sidecar_urls:
            log(f"Sidecar URLs: {sidecar_urls}", file=sys.stderr)
    else:
        log(f"No sidecar found for {media_path.name}", file=sys.stderr)

    # Normalize all title tags to use spaces instead of underscores BEFORE merging
    # This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
    def normalize_title_tag(tag: str) -> str:
        """Normalize a title tag by replacing underscores with spaces."""
        if str(tag).strip().lower().startswith("title:"):
            parts = tag.split(":", 1)
            if len(parts) == 2:
                value = parts[1].replace("_", " ").strip()
                return f"title:{value}"
        return tag
    
    sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
    pipe_object_tags = [normalize_title_tag(t) for t in pipe_object_tags]

    # Merge tags from PipeObject with tags from sidecar
    # NOTE: Remove ALL existing title tags and use only filename-based title
    # The filename is the source of truth for the title
    tags_without_titles = [t for t in merge_sequences(sidecar_tags, pipe_object_tags, case_sensitive=True) 
                          if not str(t).strip().lower().startswith("title:")]
    
    # Ensure ONE title tag based on the actual filename
    filename_title = media_path.stem.replace("_", " ").strip()
    if filename_title:
        tags = [f"title:{filename_title}"] + tags_without_titles
    else:
        tags = tags_without_titles
    
    known_urls = merge_sequences(sidecar_urls, pipe_known_urls, case_sensitive=False)
    
    if pipe_object_tags:
        log(f"Merged pipeline tags. Total tags now: {len(tags)}", file=sys.stderr)

    # Write metadata to file before uploading (only for local storage, not for Hydrus)
    # Hydrus stores tags separately, so we don't need to modify the file
    if location != 'hydrus':
        try:
            if tags:
                # Determine file kind from extension
                file_kind = ''
                sfx = media_path.suffix.lower()
                if sfx in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
                    file_kind = 'audio'
                elif sfx in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
                    file_kind = 'video'
                
                if embed_metadata_in_file(media_path, tags, file_kind):
                    log(f"Wrote metadata tags to file: {media_path.name}", file=sys.stderr)
                else:
                    log(f"Note: Could not embed metadata in file (may not be supported format)", file=sys.stderr)
        except Exception as exc:
            log(f"Warning: Failed to write metadata to file: {exc}", file=sys.stderr)
    else:
        log(f"Note: Skipping FFmpeg metadata embedding for Hydrus (tags managed separately)", file=sys.stderr)

    # Use FileStorage backend to upload to Hydrus
    try:
        file_hash = storage["hydrus"].upload(
            media_path,
            config=config,
            tags=tags,
        )
        log(f"Hydrus: {file_hash}", file=sys.stderr)
    except Exception as exc:
        log(f"Failed: {exc}", file=sys.stderr)
        return 1

    # Associate known URLs in Hydrus metadata
    url_count = 0
    if known_urls:
        try:
            client = hydrus_wrapper.get_client(config)
            if client:
                for url in known_urls:
                    u = str(url or "").strip()
                    if not u:
                        continue
                    try:
                        client.associate_url(file_hash, u)
                    except Exception as exc:
                        log(f"Hydrus associate-url failed for {u}: {exc}", file=sys.stderr)
                        continue
                    url_count += 1
        except Exception as exc:
            log(f"Failed to associate URLs: {exc}", file=sys.stderr)
    
    if url_count:
        log(f"✅ Associated {url_count} URL(s)", file=sys.stderr)
    else:
        log(f"No URLs to associate", file=sys.stderr)

    _cleanup_sidecar_files(media_path, sidecar_path)

    # Update in-memory result for downstream pipes
    try:
        # Only update piped result objects; direct -path usage may have a dummy result
        setattr(result, "hash_hex", file_hash)
        # Preserve media_kind for downstream commands (e.g., open)
        if not hasattr(result, "media_kind") or getattr(result, "media_kind") == "other":
            # Try to infer media_kind from file extension or keep existing
            suffix = media_path.suffix.lower()
            if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.rtf', '.md', '.html', '.htm', '.doc', '.docx'}:
                setattr(result, "media_kind", "document")
        if hasattr(result, "columns") and isinstance(getattr(result, "columns"), list):
            cols = list(getattr(result, "columns"))
            if ("Hash", file_hash) not in cols:
                cols.append(("Hash", file_hash))
            setattr(result, "columns", cols)
    except Exception:
        pass
    
    # If -delete flag is set, delete the file and .tags after successful upload
    # Also delete if the file is a temporary file from merge-file (contains .dlhx_ or (merged))
    is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name
    
    if delete_after_upload or is_temp_merge:
        log(f"Deleting local files (as requested or temp file)...", file=sys.stderr)
        try:
            media_path.unlink()
            log(f"✅ Deleted: {media_path.name}", file=sys.stderr)
        except OSError as exc:
            log(f"Failed to delete file: {exc}", file=sys.stderr)
        
        # Delete .tags sidecar if it exists
        if sidecar_path is not None:
            try:
                sidecar_path.unlink()
                log(f"✅ Deleted: {sidecar_path.name}", file=sys.stderr)
            except OSError as exc:
                log(f"Failed to delete sidecar: {exc}", file=sys.stderr)
    
    # Decide whether to surface search-file results at end of pipeline
    stage_ctx = ctx.get_stage_context()
    is_storage_target = location is not None
    should_display = is_storage_target and (stage_ctx is None or stage_ctx.is_last_stage)

    if (not should_display) or not file_hash:
        log(f"Successfully completed: {media_path.name} (hash={file_hash})", file=sys.stderr)

    # Emit result for Hydrus uploads so downstream commands know about it
    if location == 'hydrus':
        # Extract title from original result, fallback to filename if not available
        result_title = extract_title_from_result(result) or media_path.name
        
        result_dict = create_pipe_object_result(
            source='hydrus',
            identifier=file_hash,
            file_path=f"hydrus:{file_hash}",
            cmdlet_name='add-file',
            title=result_title,
            file_hash=file_hash,
            extra={
                'storage_source': 'hydrus',
                'hydrus_hash': file_hash,
                'tags': tags,
                'known_urls': known_urls,
            }
        )
        ctx.emit(result_dict)
        
        # Clear the stage table so downstream @N doesn't try to re-run download-data
        # Next stage will use these Hydrus file results, not format objects
        ctx.set_current_stage_table(None)

    # If this is the last stage (or not in a pipeline), show the file via search-file
    if should_display and file_hash:
        try:
            from cmdlets import search_file as search_cmdlet
            search_cmdlet._run(None, [f"hash:{file_hash}"], config)
        except Exception:
            debug("search-file lookup after add-file failed", file=sys.stderr)
    elif file_hash:
        # Not displaying search results here, so report completion normally
        log(f"Successfully completed: {media_path.name} (hash={file_hash})", file=sys.stderr)
    
    return 0

CMDLET = Cmdlet(
    name="add-file",
    summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
    usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
    args=[
        CmdletArg(name="path", type="str", required=False, description="Direct file path to upload (alternative to piped result)", alias="p"),
        SharedArgs.STORAGE,  # For hydrus, local, or directory paths
        CmdletArg(name="provider", type="str", required=False, description="File hosting provider (e.g., 0x0 for 0x0.st)", alias="prov"),
        CmdletArg(name="delete", type="flag", required=False, description="Delete the file and its .tags after successful upload.", alias="del"),
    ],
    details=[
        "- Storage location options (use -storage):",
        "    hydrus: Upload to Hydrus database with metadata tagging",
        "    local: Copy file to local directory",
        "    <path>: Copy file to specified directory",
        "- File provider options (use -provider):",
        "    0x0: Upload to 0x0.st for temporary hosting with public URL",
        "- Accepts files from official Hydrus supported types: images, animations, videos, audio, applications, projects, and archives.",
        "- When uploading to Hydrus: adds tags from .tags sidecar and associates known_urls",
        "- When using file provider: uploads to service, adds URL to sidecar",
        "- When copying locally: copies file with original metadata preserved",
        "- Use -delete flag to automatically delete the file and .tags after successful operation.",
    ],
)