Medios-Macina/cmdlets/add_file.py

from __future__ import annotations

from typing import Any, Dict, Optional, Sequence, Iterable, Tuple
from collections.abc import Iterable as IterableABC
import json
from pathlib import Path
import sys

import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from helper.logger import log, debug
from helper.file_storage import FileStorage
from ._shared import (
    Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs, create_pipe_object_result,
    extract_tags_from_result, extract_title_from_result, extract_known_urls_from_result,
    merge_sequences, extract_relationships, extract_duration
)
from helper.local_library import read_sidecar, find_sidecar, write_sidecar, LocalLibraryDB
from helper.utils import sha256_file
from metadata import embed_metadata_in_file

# Use official Hydrus supported filetypes from hydrus_wrapper
SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS

# Initialize file storage system
storage = FileStorage()


def _guess_media_kind_from_suffix(media_path: Path) -> str:
    suffix = media_path.suffix.lower()
    if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}:
        return 'audio'
    if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
        return 'video'
    if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}:
        return 'image'
    if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}:
        return 'document'
    return 'other'


def _resolve_media_kind(result: Any, media_path: Path) -> str:
    if isinstance(result, models.PipeObject):
        if getattr(result, 'media_kind', None):
            return str(result.media_kind)
    elif isinstance(result, dict):
        media_kind = result.get('media_kind')
        if media_kind:
            return str(media_kind)
        metadata = result.get('metadata')
        if isinstance(metadata, dict) and metadata.get('media_kind'):
            return str(metadata['media_kind'])
    return _guess_media_kind_from_suffix(media_path)


def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config: Optional[dict] = None) -> tuple[Optional[Path], Optional[str], list[str], list[str]]:
    # For local origin, try to read from local database first
    if origin and origin.lower() == "local" and config:
        try:
            from helper.local_library import LocalLibraryDB
            from config import get_local_storage_path

            try:
                db_root = get_local_storage_path(config)
            except Exception:
                db_root = None

            if db_root:
                try:
                    db = LocalLibraryDB(Path(db_root))
                    try:
                        # Get tags and metadata from database
                        tags = db.get_tags(media_path) or []
                        metadata = db.get_metadata(media_path) or {}
                        known_urls = metadata.get("known_urls") or []
                        file_hash = metadata.get("hash")

                        if tags or known_urls or file_hash:
                            debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)")
                            return None, file_hash, tags, known_urls
                    finally:
                        db.close()
                except Exception as exc:
                    log(f"⚠️  Could not query local database: {exc}", file=sys.stderr)
        except Exception:
            pass

    # Fall back to sidecar file lookup
    try:
        sidecar_path = find_sidecar(media_path)
    except Exception:
        sidecar_path = None
    if not sidecar_path or not sidecar_path.exists():
        return None, None, [], []
    try:
        hash_value, tags, known_urls = read_sidecar(sidecar_path)
        return sidecar_path, hash_value, tags or [], known_urls or []
    except Exception as exc:
        log(f"⚠️  Failed to read sidecar for {media_path.name}: {exc}", file=sys.stderr)
        return sidecar_path, None, [], []


def _resolve_file_hash(result: Any, fallback_hash: Optional[str], file_path: Path) -> Optional[str]:
    candidate = None
    if isinstance(result, models.PipeObject):
        candidate = result.file_hash
    elif isinstance(result, dict):
        candidate = result.get('file_hash') or result.get('hash')
    candidate = candidate or fallback_hash
    if candidate:
        return str(candidate)
    try:
        return sha256_file(file_path)
    except Exception as exc:
        log(f"⚠️  Could not compute SHA-256 for {file_path.name}: {exc}", file=sys.stderr)
        return None


def _cleanup_sidecar_files(media_path: Path, *extra_paths: Optional[Path]) -> None:
    targets = [
        media_path.parent / (media_path.name + '.metadata'),
        media_path.parent / (media_path.name + '.notes'),
        media_path.parent / (media_path.name + '.tags'),
        media_path.parent / (media_path.name + '.tags.txt'),
    ]
    targets.extend(extra_paths)
    for target in targets:
        if not target:
            continue
        try:
            path_obj = Path(target)
            if path_obj.exists():
                path_obj.unlink()
        except Exception:
            continue


def _persist_local_metadata(
    library_root: Path,
    dest_path: Path,
    tags: list[str],
    known_urls: list[str],
    file_hash: Optional[str],
    relationships: Optional[Dict[str, Any]],
    duration: Optional[float],
    media_kind: str,
) -> None:
    payload = {
        'hash': file_hash,
        'known_urls': known_urls,
        'relationships': relationships or [],
        'duration': duration,
        'size': None,
        'ext': dest_path.suffix.lower(),
        'media_type': media_kind,
        'media_kind': media_kind,
    }
    try:
        payload['size'] = dest_path.stat().st_size
    except OSError:
        payload['size'] = None

    try:
        debug(f"[_persist_local_metadata] Saving metadata to DB at: {library_root}")
        db_path = Path(library_root) / ".downlow_library.db"
        debug(f"[_persist_local_metadata] Database file: {db_path}, exists: {db_path.exists()}")
        debug(f"[_persist_local_metadata] File: {dest_path}, exists: {dest_path.exists()}, Tags: {len(tags)}, Hash: {file_hash}")
        debug(f"[_persist_local_metadata] Absolute dest_path: {dest_path.resolve()}")

        with LocalLibraryDB(library_root) as db:
            # Save metadata FIRST to ensure file entry is created in DB
            if any(payload.values()):
                debug(f"[_persist_local_metadata] Saving metadata payload first")
                try:
                    db.save_metadata(dest_path, payload)
                    debug(f"[_persist_local_metadata] ✅ Metadata saved")
                except Exception as meta_exc:
                    log(f"[_persist_local_metadata] ❌ Failed to save metadata: {meta_exc}", file=sys.stderr)
                    raise

            # Save tags to DB synchronously in same transaction
            # For local storage, DB is the primary source of truth
            if tags:
                try:
                    debug(f"[_persist_local_metadata] Saving {len(tags)} tags to DB")
                    db.save_tags(dest_path, tags)
                    debug(f"[_persist_local_metadata] ✅ Tags saved to DB")
                except Exception as tag_exc:
                    log(f"[_persist_local_metadata] ⚠️  Failed to save tags to DB: {tag_exc}", file=sys.stderr)
                    raise

        # NOTE: Sidecar files are intentionally NOT created for local storage
        # Local storage uses database as primary source, not sidecar files

        debug(f"[_persist_local_metadata] ✅ Metadata persisted successfully")
    except Exception as exc:
        log(f"⚠️  Failed to persist metadata to local database: {exc}", file=sys.stderr)
        import traceback
        log(traceback.format_exc(), file=sys.stderr)


def _handle_local_transfer(media_path: Path, destination_root: Path, result: Any, config: Optional[Dict[str, Any]] = None) -> Tuple[int, Optional[Path]]:
    """Transfer a file to local storage and return (exit_code, destination_path).

    Args:
        media_path: Path to source file
        destination_root: Destination directory
        result: Result object with metadata
        config: Configuration dictionary

    Returns:
        Tuple of (exit_code, destination_path)
        - exit_code: 0 on success, 1 on failure
        - destination_path: Path to moved file on success, None on failure
    """
    destination_root = destination_root.expanduser()
    try:
        destination_root.mkdir(parents=True, exist_ok=True)
    except Exception as exc:
        log(f"❌ Cannot prepare destination directory {destination_root}: {exc}", file=sys.stderr)
        return 1, None


    tags_from_result = extract_tags_from_result(result)
    urls_from_result = extract_known_urls_from_result(result)
    # Get origin from result if available
    result_origin = None
    if hasattr(result, "origin"):
        result_origin = result.origin
    elif isinstance(result, dict):
        result_origin = result.get("origin") or result.get("source")
    sidecar_path, sidecar_hash, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=result_origin, config=config)

    # Normalize all title tags to use spaces instead of underscores BEFORE merging
    # This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
    def normalize_title_tag(tag: str) -> str:
        """Normalize a title tag by replacing underscores with spaces."""
        if str(tag).strip().lower().startswith("title:"):
            parts = tag.split(":", 1)
            if len(parts) == 2:
                value = parts[1].replace("_", " ").strip()
                return f"title:{value}"
        return tag

    tags_from_result = [normalize_title_tag(t) for t in tags_from_result]
    sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]

    # Merge tags carefully: if URL has title tag, don't include sidecar title tags
    # This prevents duplicate title: tags when URL provides a title
    has_url_title = any(str(t).strip().lower().startswith("title:") for t in tags_from_result)
    if has_url_title:
        # URL has a title, filter out any sidecar title tags to avoid duplication
        sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")]
        merged_tags = merge_sequences(tags_from_result, sidecar_tags_filtered, case_sensitive=True)
    else:
        # No URL title, use all sidecar tags
        merged_tags = merge_sequences(tags_from_result, sidecar_tags, case_sensitive=True)

    merged_urls = merge_sequences(urls_from_result, sidecar_urls, case_sensitive=False)
    relationships = extract_relationships(result)
    duration = extract_duration(result)

    try:
        dest_file = storage["local"].upload(media_path, location=str(destination_root), move=True)
    except Exception as exc:
        log(f"❌ Failed to move file into {destination_root}: {exc}", file=sys.stderr)
        return 1, None

    dest_path = Path(dest_file)
    file_hash = _resolve_file_hash(result, sidecar_hash, dest_path)
    media_kind = _resolve_media_kind(result, dest_path)

    # Ensure only ONE title tag that matches the actual filename
    # Remove all existing title tags and add one based on the saved filename
    merged_tags_no_titles = [t for t in merged_tags if not str(t).strip().lower().startswith("title:")]
    filename_title = dest_path.stem.replace("_", " ").strip()
    if filename_title:
        merged_tags_no_titles.insert(0, f"title:{filename_title}")

    _persist_local_metadata(destination_root, dest_path, merged_tags_no_titles, merged_urls, file_hash, relationships, duration, media_kind)
    _cleanup_sidecar_files(media_path, sidecar_path)
    debug(f"✅ Moved to local library: {dest_path}")
    return 0, dest_path


def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
    """Upload/copy a file to specified location.

    Returns 0 on success, non-zero on failure.
    """
    import sys  # For stderr output

    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass

    debug("Starting add-file cmdlet")

    # Handle list of results (from piped commands that emit multiple items)
    if isinstance(result, list):
        debug(f"Processing {len(result)} piped files")
        success_count = 0
        for item in result:
            exit_code = _run(item, _args, config)
            if exit_code == 0:
                success_count += 1
        return 0 if success_count > 0 else 1

    # Parse arguments using CMDLET spec
    parsed = parse_cmdlet_args(_args, CMDLET)
    location: Optional[str] = None
    provider_name: Optional[str] = None
    delete_after_upload = False

    # Check if -path argument was provided to use direct file path instead of piped result
    path_arg = parsed.get("path")
    if path_arg:
        # Create a pseudo-result object from the file path
        media_path = Path(str(path_arg).strip())
        if not media_path.exists():
            log(f"❌ File not found: {media_path}")
            return 1
        # Create result dict with the file path and origin 'wild' for direct path inputs
        result = {"target": str(media_path), "origin": "wild"}
        log(f"Using direct file path: {media_path}")

    # Get location from parsed args - now uses SharedArgs.STORAGE so key is "storage"
    location = parsed.get("storage")
    if location:
        location = str(location).lower().strip()

    # Get file provider from parsed args
    provider_name = parsed.get("provider")
    if provider_name:
        provider_name = str(provider_name).lower().strip()

    # Check for delete flag (presence in parsed dict means it was provided)
    delete_after_upload = "delete" in parsed

    # Either storage or provider must be specified, but not both
    if location is None and provider_name is None:
        log("Either -storage or -provider must be specified")
        log("  -storage options: 'hydrus', 'local', or a directory path")
        log("  -provider options: '0x0'")
        return 1

    if location is not None and provider_name is not None:
        log("❌ Cannot specify both -storage and -provider")
        return 1

    # Validate location (storage backends)
    is_valid_location = False
    if location is not None:
        valid_locations = {'hydrus', 'local'}
        is_valid_location = location in valid_locations
    is_local_path = not is_valid_location and location is not None and ('/' in location or '\\' in location or ':' in location)

    if location is not None and not (is_valid_location or is_local_path):
        log(f"❌ Invalid location: {location}")
        log(f"Valid options: 'hydrus', '0x0', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
        return 1

    # Extract tags/known URLs from pipeline objects if available
    pipe_object_tags = extract_tags_from_result(result)
    if pipe_object_tags:
        log(f"Extracted {len(pipe_object_tags)} tag(s) from pipeline result: {', '.join(pipe_object_tags[:5])}", file=sys.stderr)
    pipe_known_urls = extract_known_urls_from_result(result)

    # Resolve media path: get from piped result
    # Support both object attributes (getattr) and dict keys (get)
    target = None
    origin = None

    # Try object attributes first
    if hasattr(result, "target"):
        target = result.target
    elif hasattr(result, "path"):
        target = result.path
    elif hasattr(result, "file_path"):
        target = result.file_path
    # Try dict keys if object attributes failed
    elif isinstance(result, dict):
        target = (result.get("target") or result.get("path") or result.get("file_path") or
                  result.get("__file_path") or result.get("__path") or result.get("__target"))

    # Get origin to detect Hydrus files
    if hasattr(result, "origin"):
        origin = result.origin
    elif hasattr(result, "source"):
        origin = result.source
    elif isinstance(result, dict):
        origin = result.get("origin") or result.get("source") or result.get("__source")

    # Convert target to string and preserve URLs (don't let Path() mangle them)
    target_str = str(target) if target else None

    # Check if this is a playlist item that needs to be downloaded first
    is_playlist_item = isinstance(result, dict) and result.get("__source") == "playlist-probe"
    if is_playlist_item and target_str and target_str.lower().startswith(("http://", "https://")):
        # This is a playlist item URL - we need to download it first
        log(f"Detected playlist item, downloading: {target_str}", file=sys.stderr)

        # Extract item number if available
        item_num = None
        if "__action" in result and result["__action"].startswith("playlist-item:"):
            item_num = result["__action"].split(":")[1]
        elif "index" in result:
            item_num = result["index"]

        # Call download-data to download this specific item
        # Pass the item number so it knows which track to download
        from cmdlets import download_data as dl_module

        # Capture emissions from download-data to process them
        captured_results = []
        original_emit = ctx.emit

        def capture_emit(obj):
            captured_results.append(obj)
            # Also emit to original so user sees progress/output if needed
            # But since add-file is usually terminal, we might not need to
            # original_emit(obj)

        # Temporarily hook the pipeline emit function
        ctx.emit = capture_emit

        try:
            if item_num:
                # Pass a marker dict to tell download-data which item to get
                download_result = dl_module._run(
                    {
                        "__playlist_url": str(target_str),
                        "__playlist_item": int(item_num)
                    },
                    [],
                    config
                )
            else:
                # Fallback: just download the URL (will show all items)
                download_result = dl_module._run(None, [str(target_str)], config)
        finally:
            # Restore original emit function
            ctx.emit = original_emit

        if download_result != 0:
            log(f"❌ Failed to download playlist item", file=sys.stderr)
            return 1

        log(f"✓ Playlist item downloaded, processing {len(captured_results)} file(s)...", file=sys.stderr)

        # Process the downloaded files recursively
        success_count = 0
        for res in captured_results:
            # Recursively call add-file with the downloaded result
            # This ensures tags and metadata from download-data are applied
            if _run(res, _args, config) == 0:
                success_count += 1

        return 0 if success_count > 0 else 1
    # Determine media_path from result
    media_path: Optional[Path] = None
    is_hydrus_file = origin and origin.lower() == "hydrus"

    if target_str:
        # Check if it's a URL or Hydrus hash
        if target_str.lower().startswith(("http://", "https://")):
            media_path = None  # Will handle as Hydrus file below
        elif not is_hydrus_file:
            # Only treat as local path if not a Hydrus file
            media_path = Path(target_str)

    if media_path is None and not is_hydrus_file and (target_str is None or not target_str.lower().startswith(("http://", "https://"))):
        # Check if this is a format object from download-data
        if isinstance(result, dict) and result.get('format_id') is not None:
            log("❌ Format object received, but add-file expects a downloaded file")
            log(f"   Tip: Use @N to automatically select and download the format")
            log(f"   Streamlined workflow:")
            log(f"     download-data \"URL\" | @{result.get('index', 'N')} | add-file -storage local")
            log(f"   (The @N automatically expands to download-data \"URL\" -item N)")
            return 1
        log("❌ File not found: provide a piped file result or local file path")
        return 1

    # Check if this is a Hydrus file - fetch the actual file path from Hydrus
    if is_hydrus_file and target_str:
        log(f"Detected Hydrus file (hash: {target_str}), fetching local path from Hydrus...", file=sys.stderr)
        try:
            from helper import hydrus

            # Get the Hydrus client
            client = hydrus.get_client(config)
            if not client:
                log(f"❌ Hydrus client unavailable", file=sys.stderr)
                return 1

            # target_str is the hash - need to get the actual file path from Hydrus
            file_hash = target_str

            # Call the /get_files/file_path endpoint to get the actual file path
            response = client.get_file_path(file_hash)
            if not response or not isinstance(response, dict):
                log(f"❌ Hydrus file_path endpoint returned invalid response", file=sys.stderr)
                return 1

            file_path_str = response.get("path")
            if not file_path_str:
                log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
                return 1

            media_path = Path(file_path_str)
            if not media_path.exists():
                log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
                return 1

            log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)

        except Exception as exc:
            log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
            import traceback
            log(f"Traceback: {traceback.format_exc()}", file=sys.stderr)
            return 1

    # Generic URL handler: if target is a URL and we haven't resolved a local path yet
    # This handles cases like "search-file -provider openlibrary ... | add-file -storage local"
    if target_str and target_str.lower().startswith(("http://", "https://")) and not is_hydrus_file and not is_playlist_item and media_path is None:
        log(f"Target is a URL, delegating to download-data: {target_str}", file=sys.stderr)
        from cmdlets import download_data as dl_module

        dl_args = []
        if location:
            dl_args.extend(["-storage", location])

        # Map provider 0x0 to storage 0x0 for download-data
        if provider_name == "0x0":
             dl_args.extend(["-storage", "0x0"])

        return dl_module._run(result, dl_args, config)

    if media_path is None:
        log("File path could not be resolved")
        return 1

    if not media_path.exists() or not media_path.is_file():
        log(f"File not found: {media_path}")
        return 1

    # Validate file type - only accept Hydrus-supported files
    file_extension = media_path.suffix.lower()
    if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
        log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
        log(f"Hydrus supports the following file types:", file=sys.stderr)
        # Display by category from hydrus_wrapper
        for category, extensions in sorted(hydrus_wrapper.SUPPORTED_FILETYPES.items()):
            ext_list = ', '.join(sorted(e.lstrip('.') for e in extensions.keys()))
            log(f"{category.capitalize()}: {ext_list}", file=sys.stderr)
        log(f"Skipping this file: {media_path.name}", file=sys.stderr)
        return 1

    # Handle based on provider or storage
    if provider_name is not None:
        # Use file provider (e.g., 0x0.st)
        from helper.search_provider import get_file_provider

        log(f"Uploading via {provider_name} file provider: {media_path.name}", file=sys.stderr)

        try:
            file_provider = get_file_provider(provider_name, config)
            if file_provider is None:
                log(f"❌ File provider '{provider_name}' not available", file=sys.stderr)
                return 1

            hoster_url = file_provider.upload(media_path)
            log(f"✅ File uploaded to {provider_name}: {hoster_url}", file=sys.stderr)

            # Associate the URL with the file in Hydrus if possible
            current_hash = locals().get('file_hash')
            if not current_hash:
                current_hash = _resolve_file_hash(result, None, media_path)

            if current_hash:
                try:
                    client = hydrus_wrapper.get_client(config)
                    if client:
                        client.associate_url(current_hash, hoster_url)
                        log(f"✅ Associated URL with file hash {current_hash}", file=sys.stderr)
                except Exception as exc:
                    log(f"⚠️  Could not associate URL with Hydrus file: {exc}", file=sys.stderr)

        except Exception as exc:
            log(f"❌ {provider_name} upload failed: {exc}", file=sys.stderr)
            return 1

        if delete_after_upload:
            try:
                media_path.unlink()
                _cleanup_sidecar_files(media_path)
                log(f"✅ Deleted file and sidecar", file=sys.stderr)
            except Exception as exc:
                log(f"⚠️  Could not delete file: {exc}", file=sys.stderr)

        return 0

    # Handle storage-based operations (location is not None here)
    valid_locations = {'hydrus', 'local'}
    is_valid_location = location in valid_locations
    is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location)

    if not (is_valid_location or is_local_path):
        log(f"❌ Invalid location: {location}")
        log(f"Valid options: 'hydrus', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
        return 1

    if location == 'local':
        try:
            from config import get_local_storage_path
            resolved_dir = get_local_storage_path(config)
        except Exception:
            resolved_dir = None

        if not resolved_dir:
            resolved_dir = config.get("LocalDir") or config.get("OutputDir")

        if not resolved_dir:
            log("❌ No local storage path configured. Set 'storage.local.path' in config.json", file=sys.stderr)
            return 1

        log(f"Moving into configured local library: {resolved_dir}", file=sys.stderr)
        exit_code, dest_path = _handle_local_transfer(media_path, Path(resolved_dir), result, config)

        # After successful local transfer, emit result for pipeline continuation
        # This allows downstream commands like add-tags to chain automatically
        if exit_code == 0 and dest_path:
            # Extract tags from result for emission
            emit_tags = extract_tags_from_result(result)
            file_hash = _resolve_file_hash(result, None, dest_path)

            # Extract title from original result, fallback to filename if not available
            result_title = extract_title_from_result(result) or dest_path.name

            # Always emit result for local files, even if no tags
            # This allows @N selection and piping to downstream commands
            result_dict = create_pipe_object_result(
                source='local',
                identifier=str(dest_path),
                file_path=str(dest_path),
                cmdlet_name='add-file',
                title=result_title,
                file_hash=file_hash,
                tags=emit_tags if emit_tags else [],
                target=str(dest_path) # Explicit target for get-file
            )
            ctx.emit(result_dict)

            # Clear the stage table so downstream @N doesn't try to re-run download-data
            # Next stage will use these local file results, not format objects
            ctx.set_current_stage_table(None)

        return exit_code

    elif is_local_path:
        try:
            destination_root = Path(location)
        except Exception as exc:
            log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
            return 1

        log(f"Moving to local path: {destination_root}", file=sys.stderr)
        exit_code, dest_path = _handle_local_transfer(media_path, destination_root, result, config)

        # After successful local transfer, emit result for pipeline continuation
        if exit_code == 0 and dest_path:
            # Extract tags from result for emission
            emit_tags = extract_tags_from_result(result)
            file_hash = _resolve_file_hash(result, None, dest_path)

            # Extract title from original result, fallback to filename if not available
            result_title = extract_title_from_result(result) or dest_path.name

            # Always emit result for local files, even if no tags
            # This allows @N selection and piping to downstream commands
            result_dict = create_pipe_object_result(
                source='local',
                identifier=str(dest_path),
                file_path=str(dest_path),
                cmdlet_name='add-file',
                title=result_title,
                file_hash=file_hash,
                tags=emit_tags if emit_tags else [],
                target=str(dest_path) # Explicit target for get-file
            )
            ctx.emit(result_dict)

            # Clear the stage table so downstream @N doesn't try to re-run download-data
            # Next stage will use these local file results, not format objects
            ctx.set_current_stage_table(None)

        return exit_code

    # location == 'hydrus'
    # Compute file hash to check if already in Hydrus
    log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr)
    log(f"Computing SHA-256 hash for: {media_path.name}", file=sys.stderr)
    try:
        file_hash = sha256_file(media_path)
    except Exception as exc:
        log(f"❌ Failed to compute file hash: {exc}", file=sys.stderr)
        return 1
    log(f"File hash: {file_hash}", file=sys.stderr)

    # Read sidecar tags and known URLs first (for tagging)

    sidecar_path, hash_from_sidecar, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=origin, config=config)
    if sidecar_path:
        log(f"Found sidecar at: {sidecar_path}", file=sys.stderr)
        log(f"Read sidecar: hash={hash_from_sidecar}, {len(sidecar_tags)} tag(s), {len(sidecar_urls)} URL(s)", file=sys.stderr)
        if sidecar_tags:
            log(f"Sidecar tags: {sidecar_tags}", file=sys.stderr)
        if sidecar_urls:
            log(f"Sidecar URLs: {sidecar_urls}", file=sys.stderr)
    else:
        log(f"No sidecar found for {media_path.name}", file=sys.stderr)

    # Normalize all title tags to use spaces instead of underscores BEFORE merging
    # This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
    def normalize_title_tag(tag: str) -> str:
        """Normalize a title tag by replacing underscores with spaces."""
        if str(tag).strip().lower().startswith("title:"):
            parts = tag.split(":", 1)
            if len(parts) == 2:
                value = parts[1].replace("_", " ").strip()
                return f"title:{value}"
        return tag

    sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
    pipe_object_tags = [normalize_title_tag(t) for t in pipe_object_tags]

    # Merge tags from PipeObject with tags from sidecar
    # NOTE: Remove ALL existing title tags and use only filename-based title
    # The filename is the source of truth for the title
    tags_without_titles = [t for t in merge_sequences(sidecar_tags, pipe_object_tags, case_sensitive=True)
                          if not str(t).strip().lower().startswith("title:")]

    # Ensure ONE title tag based on the actual filename
    filename_title = media_path.stem.replace("_", " ").strip()
    if filename_title:
        tags = [f"title:{filename_title}"] + tags_without_titles
    else:
        tags = tags_without_titles

    known_urls = merge_sequences(sidecar_urls, pipe_known_urls, case_sensitive=False)

    if pipe_object_tags:
        log(f"Merged pipeline tags. Total tags now: {len(tags)}", file=sys.stderr)

    # Write metadata to file before uploading (only for local storage, not for Hydrus)
    # Hydrus stores tags separately, so we don't need to modify the file
    if location != 'hydrus':
        try:
            if tags:
                # Determine file kind from extension
                file_kind = ''
                sfx = media_path.suffix.lower()
                if sfx in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
                    file_kind = 'audio'
                elif sfx in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
                    file_kind = 'video'

                if embed_metadata_in_file(media_path, tags, file_kind):
                    log(f"Wrote metadata tags to file: {media_path.name}", file=sys.stderr)
                else:
                    log(f"Note: Could not embed metadata in file (may not be supported format)", file=sys.stderr)
        except Exception as exc:
            log(f"Warning: Failed to write metadata to file: {exc}", file=sys.stderr)
    else:
        log(f"Note: Skipping FFmpeg metadata embedding for Hydrus (tags managed separately)", file=sys.stderr)

    # Use FileStorage backend to upload to Hydrus
    try:
        file_hash = storage["hydrus"].upload(
            media_path,
            config=config,
            tags=tags,
        )
        log(f"✅ File uploaded to Hydrus: {file_hash}", file=sys.stderr)
    except Exception as exc:
        log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr)
        return 1

    # Associate known URLs in Hydrus metadata
    url_count = 0
    if known_urls:
        try:
            client = hydrus_wrapper.get_client(config)
            if client:
                for url in known_urls:
                    u = str(url or "").strip()
                    if not u:
                        continue
                    try:
                        client.associate_url(file_hash, u)
                    except Exception as exc:
                        log(f"Hydrus associate-url failed for {u}: {exc}", file=sys.stderr)
                        continue
                    url_count += 1
        except Exception as exc:
            log(f"Failed to associate URLs: {exc}", file=sys.stderr)

    if url_count:
        log(f"✅ Associated {url_count} URL(s)", file=sys.stderr)
    else:
        log(f"No URLs to associate", file=sys.stderr)

    _cleanup_sidecar_files(media_path, sidecar_path)

    # Update in-memory result for downstream pipes
    try:
        # Only update piped result objects; direct -path usage may have a dummy result
        setattr(result, "hash_hex", file_hash)
        # Preserve media_kind for downstream commands (e.g., open)
        if not hasattr(result, "media_kind") or getattr(result, "media_kind") == "other":
            # Try to infer media_kind from file extension or keep existing
            suffix = media_path.suffix.lower()
            if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.rtf', '.md', '.html', '.htm', '.doc', '.docx'}:
                setattr(result, "media_kind", "document")
        if hasattr(result, "columns") and isinstance(getattr(result, "columns"), list):
            cols = list(getattr(result, "columns"))
            if ("Hash", file_hash) not in cols:
                cols.append(("Hash", file_hash))
            setattr(result, "columns", cols)
    except Exception:
        pass

    # If -delete flag is set, delete the file and .tags after successful upload
    if delete_after_upload:
        log(f"Deleting local files (as requested)...", file=sys.stderr)
        try:
            media_path.unlink()
            log(f"✅ Deleted: {media_path.name}", file=sys.stderr)
        except OSError as exc:
            log(f"Failed to delete file: {exc}", file=sys.stderr)

        # Delete .tags sidecar if it exists
        if sidecar_path is not None:
            try:
                sidecar_path.unlink()
                log(f"✅ Deleted: {sidecar_path.name}", file=sys.stderr)
            except OSError as exc:
                log(f"Failed to delete sidecar: {exc}", file=sys.stderr)

    log(f"✅ Successfully completed: {media_path.name} (hash={file_hash})", file=sys.stderr)

    # Emit result for Hydrus uploads so downstream commands know about it
    if location == 'hydrus':
        # Extract title from original result, fallback to filename if not available
        result_title = extract_title_from_result(result) or media_path.name

        result_dict = create_pipe_object_result(
            source='hydrus',
            identifier=file_hash,
            file_path=f"hydrus:{file_hash}",
            cmdlet_name='add-file',
            title=result_title,
            file_hash=file_hash,
            extra={
                'storage_source': 'hydrus',
                'hydrus_hash': file_hash,
                'tags': tags,
                'known_urls': known_urls,
            }
        )
        ctx.emit(result_dict)

        # Clear the stage table so downstream @N doesn't try to re-run download-data
        # Next stage will use these Hydrus file results, not format objects
        ctx.set_current_stage_table(None)

    return 0

CMDLET = Cmdlet(
    name="add-file",
    summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
    usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
    args=[
        CmdletArg(name="path", type="str", required=False, description="Direct file path to upload (alternative to piped result)", alias="p"),
        SharedArgs.STORAGE,  # For hydrus, local, or directory paths
        CmdletArg(name="provider", type="str", required=False, description="File hosting provider (e.g., 0x0 for 0x0.st)", alias="prov"),
        CmdletArg(name="delete", type="flag", required=False, description="Delete the file and its .tags after successful upload.", alias="del"),
    ],
    details=[
        "- Storage location options (use -storage):",
        "    hydrus: Upload to Hydrus database with metadata tagging",
        "    local: Copy file to local directory",
        "    <path>: Copy file to specified directory",
        "- File provider options (use -provider):",
        "    0x0: Upload to 0x0.st for temporary hosting with public URL",
        "- Accepts files from official Hydrus supported types: images, animations, videos, audio, applications, projects, and archives.",
        "- When uploading to Hydrus: adds tags from .tags sidecar and associates known_urls",
        "- When using file provider: uploads to service, adds URL to sidecar",
        "- When copying locally: copies file with original metadata preserved",
        "- Use -delete flag to automatically delete the file and .tags after successful operation.",
    ],
)