Medios-Macina/cmdlets/download_data.py

"""Download data from URLs using yt-dlp with playlist, clipping, and format selection.

This is a merged implementation combining:
- cmdlets/download_data.py (pipeline wrapper)
- funact/download_data.py (feature-rich implementation)
- helper/download.py (low-level machinery)

Features:
- Direct file downloads and yt-dlp streaming sites
- Playlist detection with interactive track selection
- Clip extraction (time ranges like 34:03-35:08)
- Format selection and audio/video toggles
- Cookies file support
- Tag extraction and metadata integration
- Progress tracking and debug logging
- Pipeline integration with result emission
- Background torrent/magnet downloads via AllDebrid
"""

from __future__ import annotations

import hashlib
import re
import sys
import threading
import time
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple
import uuid

from helper.logger import log, debug
from helper.download import download_media, probe_url
from helper.utils import sha256_file
from models import DownloadOptions

from . import register
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, parse_cmdlet_args
import models
import pipeline as pipeline_context
from config import resolve_output_dir
from metadata import (
    fetch_openlibrary_metadata_tags,
    format_playlist_entry,
    extract_ytdlp_tags
)

# ============================================================================
# Try to import optional dependencies
# ============================================================================

try:
    from yt_dlp.utils import sanitize_filename as ytdlp_sanitize_filename  # type: ignore
except Exception:  # pragma: no cover - optional dependency
    ytdlp_sanitize_filename = None


# ============================================================================
# Background Worker for AllDebrid Downloads
# ============================================================================

def _download_torrent_worker(
    worker_id: str,
    magnet_url: str,
    output_dir: Path,
    config: Dict[str, Any],
    api_key: str,
    playlist_items: Optional[str] = None,
    audio_mode: bool = False,
    wait_timeout: int = 600,
    worker_manager: Optional[Any] = None,
) -> None:
    """Background worker to download torrent/magnet via AllDebrid.
    
    Runs in a separate thread and updates worker_manager with progress.
    
    Args:
        worker_id: Unique ID for this worker task
        magnet_url: Magnet link or .torrent URL to download
        output_dir: Directory to save downloaded files
        config: Configuration dict
        api_key: AllDebrid API key
        playlist_items: Optional file selection (e.g., "1,3,5-8")
        audio_mode: Whether to tag as audio or video
        wait_timeout: Timeout in seconds for magnet processing
        worker_manager: WorkerManager instance for progress updates
    """
    worker = None
    downloaded_files = []
    
    try:
        from helper.alldebrid import AllDebridClient
        
        # Get worker reference if manager provided
        if worker_manager:
            try:
                workers = worker_manager.get_active_workers()
                worker = next((w for w in workers if w.get('id') == worker_id), None)
            except:
                worker = None
        
        def log_progress(message: str) -> None:
            """Log progress to both console and worker manager."""
            debug(message)
            if worker_manager and worker_id:
                try:
                    worker_manager.log_step(worker_id, message)
                except:
                    pass
        
        log_progress(f"[Worker {worker_id}] Submitting magnet to AllDebrid...")
        client = AllDebridClient(api_key)
        
        # Add magnet
        magnet_info = client.magnet_add(magnet_url)
        magnet_id = int(magnet_info.get('id', 0))
        
        if magnet_id <= 0:
            log_progress(f"[Worker {worker_id}] ✗ Failed to add magnet to AllDebrid")
            if worker_manager:
                try:
                    worker_manager.finish_worker(worker_id, "failed", f"Failed to add magnet")
                except:
                    pass
            return
        
        log_progress(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})")
        
        # Poll for ready status
        elapsed = 0
        last_status_reported = 0
        
        while elapsed < wait_timeout:
            try:
                status_info = client.magnet_status(magnet_id)
            except Exception as e:
                log_progress(f"[Worker {worker_id}] ⚠ Failed to get status: {e}")
                time.sleep(2)
                elapsed += 2
                continue
            
            status_code = status_info.get('statusCode', -1)
            status_text = status_info.get('status', 'Unknown')
            
            # Report progress every 5 seconds (avoid log spam)
            if elapsed - last_status_reported >= 5 or elapsed < 2:
                downloaded = status_info.get('downloaded', 0)
                total_size = status_info.get('size', 0)
                seeders = status_info.get('seeders', 0)
                speed = status_info.get('downloadSpeed', 0)
                
                if total_size > 0:
                    percent = (downloaded / total_size) * 100
                    speed_str = f" @ {speed / (1024**2):.1f} MB/s" if speed > 0 else ""
                    seeders_str = f" ({seeders} seeders)" if seeders > 0 else ""
                    progress_msg = f"[Worker {worker_id}] ⧗ {status_text}: {percent:.1f}% ({downloaded / (1024**3):.2f} / {total_size / (1024**3):.2f} GB){speed_str}{seeders_str}"
                    log_progress(progress_msg)
                    
                    # Update worker with progress
                    if worker_manager:
                        try:
                            worker_manager.update_worker(
                                worker_id,
                                status="running",
                                progress=f"{percent:.1f}%",
                                details=progress_msg
                            )
                        except:
                            pass
                else:
                    log_progress(f"[Worker {worker_id}] ⧗ {status_text}...")
                
                last_status_reported = elapsed
            
            if status_code == 4:  # Ready
                log_progress(f"[Worker {worker_id}] ✓ Files ready")
                break
            elif status_code >= 5:  # Error
                error_status = {
                    5: "Upload failed",
                    6: "Internal error during unpacking",
                    7: "Not downloaded in 20 minutes",
                    8: "File too big (>1TB)",
                    9: "Internal error",
                    10: "Download took >72 hours",
                    11: "Deleted on hoster website",
                    12: "Processing failed",
                    13: "Processing failed",
                    14: "Tracker error",
                    15: "No peers available"
                }
                error_msg = error_status.get(status_code, f"Unknown error {status_code}")
                log_progress(f"[Worker {worker_id}] ✗ Magnet failed: {error_msg}")
                if worker_manager:
                    try:
                        worker_manager.finish_worker(worker_id, "failed", error_msg)
                    except:
                        pass
                return
            
            time.sleep(2)
            elapsed += 2
        
        if elapsed >= wait_timeout:
            log_progress(f"[Worker {worker_id}] ✗ Timeout waiting for magnet (>{wait_timeout}s)")
            if worker_manager:
                try:
                    worker_manager.finish_worker(worker_id, "failed", f"Timeout after {wait_timeout}s")
                except:
                    pass
            return
        
        # Get files
        files_result = client.magnet_links([magnet_id])
        magnet_files = files_result.get(str(magnet_id), {})
        if not magnet_files and isinstance(magnet_id, int):
            # Try integer key as fallback
            for key in files_result:
                if str(key) == str(magnet_id):
                    magnet_files = files_result[key]
                    break
        files_array = magnet_files.get('files', [])
        
        if not files_array:
            log_progress(f"[Worker {worker_id}] ✗ No files found in magnet")
            if worker_manager:
                try:
                    worker_manager.finish_worker(worker_id, "failed", "No files found in magnet")
                except:
                    pass
            return
        
        log_progress(f"[Worker {worker_id}] ✓ Found {len(files_array)} file(s)")
        
        # Extract download links
        download_links = []
        
        def extract_links(items, prefix=""):
            if not isinstance(items, list):
                return
            for item in items:
                if isinstance(item, dict):
                    name = item.get('n', '')
                    link = item.get('l', '')
                    size = item.get('s', 0)
                    entries = item.get('e', [])
                    
                    if link:
                        download_links.append({
                            'link': link,
                            'name': name,
                            'size': size,
                            'path': f"{prefix}/{name}" if prefix else name
                        })
                    
                    if entries:
                        extract_links(entries, f"{prefix}/{name}" if prefix else name)
        
        extract_links(files_array)
        
        if not download_links:
            log_progress(f"[Worker {worker_id}] ✗ No downloadable files found")
            if worker_manager:
                try:
                    worker_manager.finish_worker(worker_id, "failed", "No downloadable files")
                except:
                    pass
            return
        
        # Filter by playlist_items if specified
        if playlist_items and playlist_items != '*':
            # Parse selection like "1,3,5-8"
            selected_indices = []
            for part in playlist_items.split(','):
                part = part.strip()
                if '-' in part:
                    start, end = part.split('-')
                    selected_indices.extend(range(int(start)-1, int(end)))
                else:
                    selected_indices.append(int(part)-1)
            
            download_links = [download_links[i] for i in selected_indices if i < len(download_links)]
            log_progress(f"[Worker {worker_id}] Downloading {len(download_links)} selected file(s)")
        
        # Download each file
        for idx, file_info in enumerate(download_links, 1):
            link = file_info['link']
            name = file_info['name']
            
            log_progress(f"[Worker {worker_id}] ({idx}/{len(download_links)}) Downloading: {name}")
            
            try:
                # Unlock the link
                try:
                    actual_link = client.unlock_link(link)
                    if actual_link and actual_link != link:
                        link = actual_link
                except:
                    pass
                
                # Download via HTTP
                from helper.http_client import HTTPClient
                
                output_dir.mkdir(parents=True, exist_ok=True)
                file_path = output_dir / name
                file_path.parent.mkdir(parents=True, exist_ok=True)
                
                with HTTPClient() as http_client:
                    http_client.download(link, str(file_path))
                
                log_progress(f"[Worker {worker_id}] ✓ Downloaded: {name}")
                
                # Compute hash and emit result
                file_hash = _compute_file_hash(file_path)
                
                result_obj = {
                    'file_path': str(file_path),
                    'source_url': magnet_url,
                    'file_hash': file_hash,
                    'media_kind': 'audio' if audio_mode else 'video',
                }
                
                pipeline_context.emit(result_obj)
                downloaded_files.append(file_path)
            
            except Exception as e:
                log_progress(f"[Worker {worker_id}] ⚠ Failed to download {name}: {e}")
        
        if downloaded_files:
            msg = f"✓ Torrent download complete ({len(downloaded_files)} file(s))"
            log_progress(f"[Worker {worker_id}] {msg}")
            if worker_manager:
                try:
                    worker_manager.finish_worker(worker_id, "success", msg)
                except:
                    pass
        else:
            if worker_manager:
                try:
                    worker_manager.finish_worker(worker_id, "failed", "No files downloaded")
                except:
                    pass
    
    except ImportError:
        log_progress(f"[Worker {worker_id}] ✗ AllDebrid client not available")
        if worker_manager:
            try:
                worker_manager.finish_worker(worker_id, "failed", "AllDebrid client not available")
            except:
                pass
    except Exception as e:
        import traceback
        log_progress(f"[Worker {worker_id}] ✗ Torrent download failed: {e}")
        if worker_manager:
            try:
                worker_manager.finish_worker(worker_id, "failed", str(e))
            except:
                pass
        traceback.print_exc(file=sys.stderr)


# ============================================================================
# CMDLET Metadata Declaration
# ============================================================================


# ============================================================================
# Torrent File Parsing
# ============================================================================

def _parse_torrent_file(file_path: str) -> Optional[str]:
    """Parse a .torrent file and extract magnet link.
    
    Args:
        file_path: Path to .torrent file
        
    Returns:
        Magnet link string or None if parsing fails
    """
    try:
        import bencode3
    except ImportError:
        log("⚠ bencode3 module not found. Install: pip install bencode3", file=sys.stderr)
        return None
    
    try:
        with open(file_path, 'rb') as f:
            torrent_data = bencode3.bdecode(f.read())
    except Exception as e:
        log(f"✗ Failed to parse torrent file: {e}", file=sys.stderr)
        return None
    
    try:
        # Get info dict - bencode3 returns string keys, not bytes
        info = torrent_data.get('info')
        if not info:
            log("✗ No info dict in torrent file", file=sys.stderr)
            return None
        
        # Calculate info hash (SHA1 of bencoded info dict)
        import hashlib
        info_hash = hashlib.sha1(bencode3.bencode(info)).hexdigest()
        
        # Get name
        name = info.get('name', 'Unknown')
        if isinstance(name, bytes):
            name = name.decode('utf-8', errors='ignore')
        
        # Create magnet link
        magnet = f"magnet:?xt=urn:btih:{info_hash}&dn={name}"
        
        # Add trackers if available
        announce = torrent_data.get('announce')
        if announce:
            try:
                tracker = announce if isinstance(announce, str) else announce.decode('utf-8', errors='ignore')
                magnet += f"&tr={tracker}"
            except:
                pass
        
        announce_list = torrent_data.get('announce-list', [])
        for tier in announce_list:
            if isinstance(tier, list):
                for tracker_item in tier:
                    try:
                        tracker = tracker_item if isinstance(tracker_item, str) else tracker_item.decode('utf-8', errors='ignore')
                        if tracker:
                            magnet += f"&tr={tracker}"
                    except:
                        pass
        
        debug(f"✓ Parsed torrent: {name} (hash: {info_hash})")
        return magnet
        
    except Exception as e:
        log(f"✗ Error parsing torrent metadata: {e}", file=sys.stderr)
        return None


def _download_torrent_file(url: str, temp_dir: Optional[Path] = None) -> Optional[str]:
    """Download a .torrent file from URL and parse it.
    
    Args:
        url: URL to .torrent file
        temp_dir: Optional temp directory for storing downloaded file
        
    Returns:
        Magnet link string or None if download/parsing fails
    """
    try:
        from helper.http_client import HTTPClient
    except ImportError:
        log("⚠ HTTPClient not available", file=sys.stderr)
        return None
    
    try:
        # Download torrent file
        debug(f"⇓ Downloading torrent file: {url}")
        
        with HTTPClient(timeout=30.0) as client:
            response = client.get(url)
            response.raise_for_status()
            torrent_data = response.content
        
        # Create temp file
        if temp_dir is None:
            temp_dir = Path.home() / ".cache" / "downlow"
        temp_dir.mkdir(parents=True, exist_ok=True)
        
        # Save to temp file
        import hashlib
        url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
        temp_file = temp_dir / f"torrent_{url_hash}.torrent"
        temp_file.write_bytes(torrent_data)
        
        debug(f"✓ Downloaded torrent file: {temp_file}")
        
        # Parse it
        magnet = _parse_torrent_file(str(temp_file))
        
        # Clean up
        try:
            temp_file.unlink()
        except:
            pass
        
        return magnet
        
    except Exception as e:
        log(f"✗ Failed to download/parse torrent: {e}", file=sys.stderr)
        return None


def _is_torrent_file_or_url(arg: str) -> bool:
    """Check if argument is a .torrent file path or URL.
    
    Args:
        arg: Argument to check
        
    Returns:
        True if it's a .torrent file or URL
    """
    arg_lower = arg.lower()
    
    # Check if it's a .torrent file path
    if arg_lower.endswith('.torrent'):
        return Path(arg).exists() or arg_lower.startswith('http')
    
    # Check if it's a URL to .torrent file
    if arg_lower.startswith('http://') or arg_lower.startswith('https://'):
        return '.torrent' in arg_lower
    
    return False


def _process_torrent_input(arg: str) -> Optional[str]:
    """Process torrent file or URL and convert to magnet link.
    
    Args:
        arg: .torrent file path or URL
        
    Returns:
        Magnet link or original argument if not processable
    """
    try:
        if arg.lower().startswith('http://') or arg.lower().startswith('https://'):
            # It's a URL
            return _download_torrent_file(arg) or arg
        else:
            # It's a file path
            if Path(arg).exists():
                return _parse_torrent_file(arg) or arg
            else:
                return arg
    except Exception as e:
        log(f"⚠ Error processing torrent: {e}", file=sys.stderr)
        return arg


# ============================================================================
# Helper Functions
# ============================================================================


def _show_playlist_table(url: str, probe_info: Dict[str, Any]) -> Optional[Dict[str, Any]]:
    """Show playlist result table and get user selection.
    
    Args:
        url: Original URL
        probe_info: Info dict from probe_url()
    
    Returns:
        Modified probe_info with selected_entries, or None if user cancelled
    """
    entries = probe_info.get("entries", [])
    if not entries:
        return probe_info
    
    extractor = probe_info.get("extractor", "")
    title = probe_info.get("title", "Playlist")
    
    debug(f"📋 Detected playlist: {title} ({len(entries)} items) - {extractor}")
    
    # Skip full metadata enrichment for speed - extract_flat usually provides enough info
    # debug("📋 Fetching metadata for each item...")
    # entries = enrich_playlist_entries(entries, extractor)
    
    # Emit each playlist item as a separate result row
    for i, entry in enumerate(entries, 1):
        formatted = format_playlist_entry(entry, i, extractor)
        
        # Build tags from available metadata
        tags = []
        artist = formatted.get("artist") or formatted.get("uploader", "")
        if artist:
            tags.append(artist)
        
        album = formatted.get("album", "")
        if album and album != title:  # Don't repeat playlist title
            tags.append(album)
        
        # Extract individual fields for separate columns
        duration = formatted.get("duration", 0)
        duration_str = ""
        if duration:
            minutes = int(duration // 60)
            seconds = int(duration % 60)
            duration_str = f"{minutes}m{seconds}s"
            tags.append(duration_str)
        
        # Normalize extractor for comparison (remove special chars and case)
        ext_lower = extractor.lower().replace(":", "").replace(" ", "")
        
        track_number = None
        # Add site-specific tags and fields
        if "youtube" in ext_lower and formatted.get("channel"):
            tags.append(f"channel:{formatted.get('channel')}")
        elif "bandcamp" in ext_lower:
            track_number = formatted.get("track_number", i)
            tags.append(f"track:{track_number}")
        
        # Create result row with separate columns for important metadata
        # Build columns dynamically based on available data
        columns = [
            ("#", i),
            ("Title", formatted["title"]),
        ]
        
        # Add Artist column if available
        if artist:
            columns.append(("Artist", artist))
        
        # Add Duration column if available
        if duration_str:
            columns.append(("Duration", duration_str))
        
        # Add Track number column for music platforms
        if track_number is not None:
            columns.append(("Track", str(track_number)))
        
        # Add Tags column for remaining tags (if any)
        remaining_tags = [t for t in tags if t not in [artist, duration_str]]
        if remaining_tags:
            columns.append(("Tags", ", ".join(remaining_tags)))
        
        # Create result row with compact columns display
        # Using "columns" field tells ResultTable which columns to show
        result_row = {
            "title": formatted["title"],
            "tags": tags,
            "index": i,
            # Store all metadata but don't display in table (use columns field)
            "__source": "playlist-probe",
            "__id": f"{i}",
            "__file_path": url,
            "__action": f"playlist-item:{i}",
            "__artist": formatted.get("artist", ""),
            "__duration": formatted.get("duration", 0),
            "__extractor": extractor,
            # Define which columns should be shown in the result table
            "columns": columns
        }
        
        # Add site-specific metadata for pipeline use
        if "youtube" in ext_lower:
            result_row["__video_id"] = formatted.get("video_id", "")
            result_row["__channel"] = formatted.get("channel", "")
        elif "bandcamp" in ext_lower:
            result_row["__track_number"] = formatted.get("track_number", i)
            result_row["__album"] = formatted.get("album") or title
        elif "spotify" in ext_lower:
            result_row["__artists"] = formatted.get("artists", "")
            result_row["__album"] = formatted.get("album", "")
        
        pipeline_context.emit(result_row)
    
    debug(f"ℹ️  Playlist items displayed. Use result table references (@1, @2, etc.) to select tracks.")
    
    # Return modified probe info
    return probe_info


def _parse_time_range(clip_spec: str) -> Optional[Tuple[int, int]]:
    """Parse time range from MM:SS-MM:SS or seconds format.
    
    Args:
        clip_spec: Time range string like "34:03-35:08" or "2043-2108"
    
    Returns:
        Tuple of (start_seconds, end_seconds) or None if invalid
    """
    try:
        if '-' not in clip_spec:
            return None
        
        parts = clip_spec.split('-')
        if len(parts) != 2:
            return None
        
        start_str, end_str = parts
        
        # Try MM:SS format first
        if ':' in start_str:
            start_parts = start_str.split(':')
            if len(start_parts) == 2:
                start_sec = int(start_parts[0]) * 60 + int(start_parts[1])
            else:
                return None
        else:
            start_sec = int(start_str)
        
        if ':' in end_str:
            end_parts = end_str.split(':')
            if len(end_parts) == 2:
                end_sec = int(end_parts[0]) * 60 + int(end_parts[1])
            else:
                return None
        else:
            end_sec = int(end_str)
        
        if start_sec >= end_sec:
            return None
        
        return (start_sec, end_sec)
    
    except (ValueError, AttributeError):
        return None


MEDIA_EXTENSIONS = {'.mp3', '.m4a', '.mp4', '.mkv', '.webm', '.flac', '.wav', '.aac'}


def _parse_playlist_selection_indices(selection: Optional[str], total_items: int) -> list[int]:
    """Convert playlist selection string to 0-based indices."""
    if total_items <= 0:
        return []
    if not selection or selection.strip() in {"*", ""}:
        return list(range(total_items))
    indices: list[int] = []
    for part in selection.split(','):
        part = part.strip()
        if not part:
            continue
        if '-' in part:
            bounds = part.split('-', 1)
            try:
                start = int(bounds[0])
                end = int(bounds[1])
            except ValueError:
                continue
            if start <= 0 or end <= 0:
                continue
            if start > end:
                start, end = end, start
            for idx in range(start - 1, end):
                if 0 <= idx < total_items:
                    indices.append(idx)
        else:
            try:
                idx = int(part) - 1
            except ValueError:
                continue
            if 0 <= idx < total_items:
                indices.append(idx)
    seen: set[int] = set()
    ordered: list[int] = []
    for idx in indices:
        if idx not in seen:
            ordered.append(idx)
            seen.add(idx)
    return ordered


def _select_playlist_entries(entries: Any, selection: Optional[str]) -> list[Dict[str, Any]]:
    """Pick playlist entries according to a selection string."""
    if not isinstance(entries, list):
        return []
    indices = _parse_playlist_selection_indices(selection, len(entries))
    if not indices:
        return []
    selected: list[Dict[str, Any]] = []
    for idx in indices:
        entry = entries[idx]
        if isinstance(entry, dict):
            selected.append(entry)
    return selected


def _sanitize_title_for_filename(title: Optional[str]) -> str:
    """Match yt-dlp's restricted filename sanitization for comparisons."""
    if not title:
        return ""
    if ytdlp_sanitize_filename:
        try:
            return ytdlp_sanitize_filename(title, restricted=True)
        except Exception:
            pass
    sanitized = re.sub(r"[^0-9A-Za-z._-]+", "_", title)
    return sanitized.strip() or ""


def _find_playlist_files_from_entries(
    entries: Sequence[Dict[str, Any]],
    output_dir: Path,
) -> list[Path]:
    """Resolve expected playlist files based on entry titles/exts."""
    matched: list[Path] = []
    seen: set[str] = set()
    for entry in entries:
        title = entry.get('title') if isinstance(entry, dict) else None
        sanitized = _sanitize_title_for_filename(title)
        if not sanitized:
            continue
        preferred_exts: list[str] = []
        for key in ('ext', 'audio_ext', 'video_ext'):
            value = entry.get(key) if isinstance(entry, dict) else None
            if isinstance(value, str) and value:
                preferred_exts.append(value.lower())
        if not preferred_exts:
            preferred_exts = [ext.strip('.') for ext in MEDIA_EXTENSIONS]
        candidate: Optional[Path] = None
        for ext in preferred_exts:
            ext = ext.lstrip('.').lower()
            path = output_dir / f"{sanitized}.{ext}"
            if path.exists():
                candidate = path
                break
        if candidate is None:
            try:
                # Bandcamp/yt-dlp often prefixes uploader info, so fall back to a substring match.
                for f in output_dir.glob(f"*{sanitized}*"):
                    if f.suffix.lower() in MEDIA_EXTENSIONS and f.is_file():
                        candidate = f
                        break
            except OSError:
                candidate = None
        if candidate and str(candidate) not in seen:
            matched.append(candidate)
            seen.add(str(candidate))
    return matched


def _snapshot_playlist_paths(
    entries: Sequence[Dict[str, Any]],
    output_dir: Path,
) -> tuple[list[Path], set[str]]:
    """Capture current playlist file paths for a given selection."""
    matches = _find_playlist_files_from_entries(entries, output_dir)
    resolved: set[str] = set()
    for path in matches:
        try:
            resolved.add(str(path.resolve()))
        except OSError:
            resolved.add(str(path))
    return matches, resolved


def _expand_playlist_selection(selection: str, num_items: int) -> str:
    """Expand playlist selection string, handling wildcards.
    
    Args:
        selection: Selection string like '1,3,5-8' or '*'
        num_items: Total number of items in playlist
    
    Returns:
        Expanded selection string like '1,3,5,6,7,8' or '1-18' for '*'
    """
    if selection.strip() == "*":
        # Wildcard: select all items
        return f"1-{num_items}"
    
    # Return as-is if not wildcard (yt-dlp will handle ranges and lists)
    return selection


def _parse_selection_string(selection: str) -> List[int]:
    """Parse selection string into list of integers.
    
    Handles formats like:
    - "2" -> [2]
    - "1,3,5" -> [1, 3, 5]
    - "1-3" -> [1, 2, 3]
    - "1,3-5,7" -> [1, 3, 4, 5, 7]
    
    Args:
        selection: Selection string
        
    Returns:
        List of integer indices
    """
    result = []
    for part in selection.split(','):
        part = part.strip()
        if '-' in part:
            # Range like "3-5"
            try:
                start, end = part.split('-')
                start_num = int(start.strip())
                end_num = int(end.strip())
                result.extend(range(start_num, end_num + 1))
            except (ValueError, AttributeError):
                continue
        else:
            # Single number
            try:
                result.append(int(part))
            except ValueError:
                continue
    return result


def _filter_and_sort_formats(formats: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Filter and sort formats for user selection.
    
    Filters out:
    - Storyboards (webp, svg formats)
    - Low quality audio (below ~128 kbps, typically 48kHz audio)
    - Video below 360p
    
    Sorts to prioritize:
    - @1: Best combined audio+video (highest resolution, highest bitrate)
    - @2: Best audio-only (highest bitrate audio)
    - Then rest by quality
    
    Args:
        formats: List of format dicts from yt-dlp
        
    Returns:
        Filtered and sorted format list
    """
    filtered = []
    
    for fmt in formats:
        format_id = fmt.get("format_id", "")
        ext = fmt.get("ext", "")
        vcodec = fmt.get("vcodec", "")
        acodec = fmt.get("acodec", "")
        height = fmt.get("height")
        tbr = fmt.get("tbr")  # Total bitrate
        
        # Skip storyboards (webp images, svg, etc.)
        if ext in {"webp", "svg", "mhtml"}:
            continue
        
        # Skip video-only formats below 360p
        if vcodec != "none" and acodec == "none":
            if height and height < 360:
                continue
        
        # Skip low-bitrate audio (typically 48kHz, very low quality)
        # Keep audio with tbr >= 64 kbps (reasonable quality threshold)
        if acodec != "none" and vcodec == "none":
            if tbr and tbr < 64:
                continue
        
        filtered.append(fmt)
    
    # Sort formats: best combined first, then best audio-only, then video-only
    def format_sort_key(fmt: Dict[str, Any]) -> tuple:
        vcodec = fmt.get("vcodec", "")
        acodec = fmt.get("acodec", "")
        height = fmt.get("height", 0) or 0
        tbr = fmt.get("tbr", 0) or 0
        
        # Category 0: has both audio and video (sort first)
        # Category 1: audio only (sort second)
        # Category 2: video only (sort last, by height desc)
        if vcodec != "none" and acodec != "none":
            category = 0
            return (category, -height, -tbr)
        elif acodec != "none" and vcodec == "none":
            category = 1
            return (category, -tbr)  # Sort by bitrate descending
        else:  # Video only
            category = 2
            return (category, -height, -tbr)  # Sort by height descending, then bitrate
    
    return sorted(filtered, key=format_sort_key)


def _compute_file_hash(file_path: Path) -> Optional[str]:
    """Compute SHA256 hash of file."""
    try:
        return sha256_file(file_path)
    except Exception:
        return None


# ============================================================================
# Main Cmdlet Function
# ============================================================================

def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: bool = True) -> int:
    """Download data from URLs with advanced options.
    
    Accepts:
    - Single URL as string
    - Result object with 'url' or 'file_path' field
    - List of results
    - File containing URLs (one per line)
    
    Returns:
        Exit code (0 for success, 1 for failure)
    """
    
    debug("Starting download-data")
    
    collected_results: List[Dict[str, Any]] = []
    
    def _emit(obj: Any) -> None:
        """Internal helper to collect and optionally emit results."""
        collected_results.append(obj)
        if emit_results:
            pipeline_context.emit(obj)

    # Track pipeline mode once so playlist handling can respect current run scope
    stage_ctx = pipeline_context.get_stage_context()
    in_pipeline = stage_ctx is not None and getattr(stage_ctx, 'total_stages', 1) > 1
    
    # ========================================================================
    # ARGUMENT PARSING
    # ========================================================================
    
    # Parse arguments using shared parser
    parsed = parse_cmdlet_args(args, CMDLET)
    
    audio_mode = parsed.get("audio", False)
    format_selector = parsed.get("format")
    list_formats_mode = parsed.get("list-formats", False)
    
    clip_spec = parsed.get("clip")
    clip_range = None
    if clip_spec:
        clip_range = _parse_time_range(clip_spec)
        if clip_range:
            debug(f"Clip range: {clip_spec} ({clip_range[0]}-{clip_range[1]} seconds)")
        else:
            log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
            return 1
            
    cookies_path = parsed.get("cookies")
    storage_location = parsed.get("storage")
    
    torrent_mode = parsed.get("torrent", False)
    wait_timeout = float(parsed.get("wait", 1800))
    
    # Collect URLs from positional args and -url flag
    # Both map to "url" in parsed result
    urls_to_download = []
    raw_urls = parsed.get("url", [])
    if isinstance(raw_urls, str):
        raw_urls = [raw_urls]
        
    for arg in raw_urls:
        if arg.lower().startswith(('http://', 'https://')):
            # Check if it's a .torrent URL or file first
            if '.torrent' in arg.lower():
                debug(f"Processing torrent URL: {arg}")
                magnet = _process_torrent_input(arg)
                if magnet and magnet.lower().startswith('magnet:'):
                    urls_to_download.append(magnet)
                    debug(f"✓ Converted to magnet: {magnet[:70]}...")
                elif magnet:
                    urls_to_download.append(magnet)
                else:
                    log(f"✗ Failed to process torrent: {arg}", file=sys.stderr)
            else:
                urls_to_download.append(arg)
        elif torrent_mode and (arg.lower().startswith('magnet:') or len(arg) == 40 or len(arg) == 64):
            # In torrent mode, accept magnet links or torrent hashes (40-char SHA1 or 64-char SHA256)
            urls_to_download.append(arg)
            debug(f"Torrent/magnet added: {arg[:50]}...")
        elif _is_torrent_file_or_url(arg):
            # Handle .torrent files and URLs
            log(f"Processing torrent file/URL: {arg}", flush=True)
            magnet = _process_torrent_input(arg)
            if magnet and magnet.lower().startswith('magnet:'):
                urls_to_download.append(magnet)
                log(f"✓ Converted to magnet: {magnet[:70]}...", flush=True)
            elif magnet:
                urls_to_download.append(magnet)
            else:
                log(f"✗ Failed to process torrent: {arg}", file=sys.stderr)
        else:
            # Treat as URL if it looks like one
            if arg.lower().startswith(('magnet:', 'ftp://')):
                urls_to_download.append(arg)
            else:
                # Check if it's a file containing URLs
                path = Path(arg)
                if path.exists() and path.is_file():
                    try:
                        with open(arg, 'r') as f:
                            for line in f:
                                line = line.strip()
                                if line and line.lower().startswith(('http://', 'https://')):
                                    urls_to_download.append(line)
                        log(f"Loaded URLs from file: {arg}", flush=True)
                    except Exception as e:
                        log(f"Error reading file {arg}: {e}", file=sys.stderr)
                else:
                    log(f"Ignored argument: {arg}", file=sys.stderr)
    
    # Item selection (for playlists/formats)
    # Note: -item flag is deprecated in favor of @N pipeline selection, but kept for compatibility
    playlist_items = parsed.get("item")
    if playlist_items:
        log(f"Item selection: {playlist_items}", flush=True)


    def _is_openlibrary_downloadable(ebook_access_val: Any, status_val: Any) -> bool:
        access = str(ebook_access_val or "").strip().lower()
        status = str(status_val or "").strip().lower()
        if status == "download":
            return True
        if access in {"borrowable", "public", "full", "open"} or access.startswith("full "):
            return True
        if "✓" in str(status_val or ""):
            return True
        return False

    # ========================================================================
    # INPUT PROCESSING - Extract URLs from pipeline or arguments
    # ========================================================================
    
    # Initialize worker tracking for downloads
    import uuid
    from helper.local_library import LocalLibraryDB
    from config import get_local_storage_path
    
    worker_id = str(uuid.uuid4())
    library_root = get_local_storage_path(config or {})
    db = None
    if library_root:
        try:
            db = LocalLibraryDB(library_root)
            db.insert_worker(
                worker_id,
                "download",
                title="Download Data",
                description="Downloading files from search results",
                pipe=pipeline_context.get_current_command_text()
            )
        except Exception as e:
            log(f"⚠ Worker tracking unavailable: {e}", file=sys.stderr)
    
    piped_results = normalize_result_input(result)
    
    # Track files downloaded directly (e.g. Soulseek) to avoid "No URLs" error
    files_downloaded_directly = 0
    
    # Only process piped results if no URLs were provided in arguments
    # This prevents picking up residue from previous commands when running standalone
    if piped_results and not urls_to_download:
        for item in piped_results:
            url = None
            origin = None
            
            # ====== CHECK FOR PLAYLIST ITEM MARKER FROM add-file ======
            # When add-file detects a playlist item and wants to download it
            if isinstance(item, dict) and item.get('__playlist_url'):
                playlist_url = item.get('__playlist_url')
                item_num = item.get('__playlist_item', 1)
                log(f"📍 Playlist item from add-file: #{item_num}", flush=True)
                # Add to download list with marker
                urls_to_download.append({
                    '__playlist_url': playlist_url,
                    '__playlist_item': int(item_num)
                })
                continue
            
            # ====== CHECK FOR PLAYLIST ITEM SELECTION FIRST ======
            # When user selects @12 from a playlist, item is emitted dict with __action: "playlist-item:12"
            if isinstance(item, dict) and '__action' in item and item['__action'].startswith('playlist-item:'):
                playlist_url = item.get('__file_path')
                playlist_action = item['__action']  # e.g., "playlist-item:12"
                item_num = playlist_action.split(':')[1]  # Extract item number (1-based)
                
                if playlist_url:
                    # Playlist item selected - need to download this specific track
                    log(f"📍 Playlist item selected: #{item_num} - {item.get('title', 'Unknown')}", flush=True)
                    # Add to download list - the playlist will be probed and item extracted
                    # Store with special marker so we know which item to select
                    urls_to_download.append({
                        '__playlist_url': playlist_url,
                        '__playlist_item': int(item_num)
                    })
                continue
            
            # ====== CHECK FOR FORMAT SELECTION RESULT ======
            if isinstance(item, dict) and item.get('format_id') is not None and item.get('source_url'):
                log(f"🎬 Format selected from pipe: {item.get('format_id')}", flush=True)
                log(f"   Source URL: {item.get('source_url')}", flush=True)
                # Store as dict so we can extract format_id + source_url during download
                urls_to_download.append(item)
                continue
            elif hasattr(item, 'format_id') and hasattr(item, 'source_url') and item.format_id is not None:
                log(f"🎬 Format selected from pipe: {item.format_id}", flush=True)
                log(f"   Source URL: {item.source_url}", flush=True)
                urls_to_download.append({
                    'format_id': item.format_id,
                    'source_url': item.source_url,
                })
                continue
            
            if isinstance(item, dict):
                # Check for search provider results first
                origin = item.get('origin')
                if origin in {'openlibrary', 'libgen', 'soulseek', 'debrid'}:
                    # Handle search provider results
                    title = item.get('title', 'Item')
                    if origin == 'openlibrary':
                        # OpenLibrary: First check if lendable/downloadable via Archive.org
                        # Only route to LibGen if NOT available on Archive.org
                        metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {}
                        isbn = metadata.get('isbn') or item.get('isbn')
                        olid = metadata.get('olid') or item.get('olid')
                        
                        log(f"[search-result] OpenLibrary: '{title}'", flush=True)
                        if isbn:
                            log(f"  ISBN: {isbn}", flush=True)
                        
                        # Check if book is borrowable from ebook_access field or status
                        ebook_access = metadata.get('ebook_access') or item.get('ebook_access', '')
                        status_text = metadata.get('status') or item.get('status', '')
                        archive_id = metadata.get('archive_id') or item.get('archive_id')
                        
                        # Determine if borrowable based on new status vocabulary
                        is_borrowable = _is_openlibrary_downloadable(ebook_access, status_text)
                        
                        if is_borrowable:
                            log(f"  ✓ Available for borrowing on Archive.org", flush=True)
                            log(f"  → Queued for auto-borrowing...", flush=True)
                            # Queue borrow request as special dict object
                            # We need OCAID (Archive.org ID), not just numeric OLID
                            ocaid = archive_id
                            
                            if not ocaid and isbn:
                                # If no OCAID in metadata, fetch it from OpenLibrary ISBN lookup
                                try:
                                    import requests
                                    ol_url = f'https://openlibrary.org/isbn/{isbn}.json'
                                    r = requests.get(ol_url, timeout=5)
                                    if r.status_code == 200:
                                        ol_data = r.json()
                                        ocaid = ol_data.get('ocaid')
                                except Exception as e:
                                    log(f"  ⚠ Could not fetch OCAID from OpenLibrary: {e}", file=sys.stderr)
                            
                            if ocaid:
                                urls_to_download.append({
                                    '__borrow_request__': True,
                                    'book_id': ocaid,
                                    'isbn': isbn,
                                    'title': title,
                                    'olid': olid
                                })
                            else:
                                # OCAID not found - book claims borrowable but not on Archive.org
                                # Fall back to LibGen search instead
                                log(f"  ⚠ Book marked borrowable but not found on Archive.org", file=sys.stderr)
                                if isbn:
                                    try:
                                        from helper.search_provider import get_provider
                                        libgen_provider = get_provider("libgen", config)
                                        if libgen_provider:
                                            libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1)
                                            if libgen_results:
                                                libgen_result = libgen_results[0]
                                                url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None)
                                                if url:
                                                    urls_to_download.append(url)
                                                    log(f"  ✓ Found on LibGen instead", flush=True)
                                                else:
                                                    log(f"  ⚠ Not found on LibGen", file=sys.stderr)
                                            else:
                                                log(f"  ⚠ Not found on LibGen", file=sys.stderr)
                                        else:
                                            log(f"  ⚠ LibGen provider not available", file=sys.stderr)
                                    except Exception as e:
                                        log(f"  ✗ Error searching LibGen: {e}", file=sys.stderr)
                        else:
                            # Book is NOT borrowable - route to LibGen
                            if isbn:
                                log(f"  ⚠ Not available on Archive.org - attempting LibGen...", flush=True)
                                try:
                                    from helper.search_provider import get_provider
                                    libgen_provider = get_provider("libgen", config)
                                    if libgen_provider:
                                        libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1)
                                        if libgen_results:
                                            libgen_result = libgen_results[0]
                                            url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None)
                                            if url:
                                                urls_to_download.append(url)
                                                log(f"  ✓ Found on LibGen", flush=True)
                                            else:
                                                log(f"  ⚠ Not found on LibGen", file=sys.stderr)
                                        else:
                                            log(f"  ⚠ Not found on LibGen", flush=True)
                                            log(f"  ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
                                    else:
                                        log(f"  ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
                                except Exception as e:
                                    log(f"  ⚠ Could not search LibGen: {e}", file=sys.stderr)
                                    log(f"  ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
                            else:
                                log(f"  ⚠ ISBN not available", flush=True)
                                log(f"  ▶ Visit: {item.get('target', 'https://openlibrary.org')}", flush=True)
                                log(f"  ▶ Or find ISBN and use: search-file -provider libgen 'isbn:\"<ISBN>\"'", flush=True)
                    elif origin == 'soulseek':
                        # Handle Soulseek downloads using the provider
                        metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {}
                        username = metadata.get('username')
                        filename = metadata.get('filename')
                        size = item.get('size_bytes') or 0
                        
                        if username and filename:
                            try:
                                import asyncio
                                from helper.search_provider import SoulSeekProvider
                                provider = SoulSeekProvider(config)
                                log(f"[search-result] Soulseek: '{title}'", flush=True)
                                log(f"  ▶ Downloading from {username}...", flush=True)
                                
                                if db:
                                    db.append_worker_stdout(worker_id, f"Downloading from Soulseek: {title} (from {username})")
                                
                                # Get temp directory from config
                                temp_dir = config.get('temp')
                                if temp_dir:
                                    temp_dir = str(Path(temp_dir).expanduser())
                                
                                # Call async download_file with asyncio.run()
                                success = asyncio.run(provider.download_file(
                                    username=username,
                                    filename=filename,
                                    file_size=size,
                                    target_dir=temp_dir
                                ))
                                
                                if success:
                                    downloaded_file = Path(provider.DOWNLOAD_DIR) / Path(filename).name
                                    if downloaded_file.exists():
                                        log(f"  ✓ Downloaded: {downloaded_file.name}", flush=True)
                                        files_downloaded_directly += 1
                                        if db:
                                            db.append_worker_stdout(worker_id, f"✓ Downloaded: {downloaded_file.name}")
                                        if pipeline_context._PIPE_ACTIVE:
                                            # Create proper PipeObject result
                                            result_dict = create_pipe_object_result(
                                                source='soulseek',
                                                identifier=filename,
                                                file_path=str(downloaded_file),
                                                cmdlet_name='download-data',
                                                title=title,
                                                target=str(downloaded_file), # Explicit target for add-file
                                                extra={
                                                    "metadata": metadata,
                                                    "origin": "soulseek"
                                                }
                                            )
                                            pipeline_context.emit(result_dict)
                                else:
                                    log(f"  ✗ Download failed (peer may be offline)", file=sys.stderr)
                                    if db:
                                        db.append_worker_stdout(worker_id, f"✗ Download failed for {title}")
                                    log(f"  ▶ Try another result: search-file -provider soulseek \"...\" | @2 | download-data", flush=True)
                            except Exception as e:
                                log(f"  ✗ Download error: {e}", file=sys.stderr)
                                if db:
                                    db.append_worker_stdout(worker_id, f"✗ Error: {e}")
                                log(f"  ▶ Alternative: search-soulseek -download \"{title}\" -storage <location>", flush=True)
                        else:
                            log(f"[search-result] Soulseek: '{title}'", flush=True)
                            log(f"  ⚠ Missing download info (username/filename)", flush=True)
                            if db:
                                db.append_worker_stdout(worker_id, f"⚠ Missing download info for {title}")
                    elif origin == 'libgen':
                        # LibGen results can use the direct URL
                        # Also extract mirrors dict for fallback if primary fails
                        url = item.get('target')
                        # Extract mirrors and book_id from full_metadata
                        metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {}
                        mirrors = metadata.get('mirrors', {})
                        book_id = metadata.get('book_id', '')
                        
                        if url:
                            url_entry = {
                                'url': str(url),
                                'mirrors': mirrors,  # Alternative mirrors for fallback
                                'book_id': book_id,
                            }
                            urls_to_download.append(url_entry)
                            log(f"[search-result] LibGen: '{title}'", flush=True)
                            log(f"  ✓ Queued for download", flush=True)
                            if mirrors:
                                log(f"  Mirrors available: {len(mirrors)}", flush=True)
                    elif origin == 'debrid':
                        # Debrid results can use download-data
                        url = item.get('target')
                        if url:
                            urls_to_download.append(str(url))
                            log(f"[search-result] Debrid: '{title}'", flush=True)
                            log(f"  ✓ Queued for download", flush=True)
                else:
                    # Regular fields for non-search results
                    url = item.get('url') or item.get('link') or item.get('href') or item.get('target')
            else:
                # Object attributes
                origin = getattr(item, 'origin', None)
                title = getattr(item, 'title', 'Item')
                if origin in {'openlibrary', 'libgen', 'soulseek', 'debrid'}:
                    # Handle search provider results
                    if origin == 'openlibrary':
                        # OpenLibrary: First check if lendable/downloadable via Archive.org
                        # Only route to LibGen if NOT available on Archive.org
                        metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {}
                        isbn = metadata.get('isbn') or getattr(item, 'isbn', None)
                        olid = metadata.get('olid') or getattr(item, 'olid', None)
                        
                        log(f"[search-result] OpenLibrary: '{title}'", flush=True)
                        if isbn:
                            log(f"  ISBN: {isbn}", flush=True)
                        
                        # Check if book is borrowable from ebook_access field or status
                        ebook_access = metadata.get('ebook_access') or getattr(item, 'ebook_access', '')
                        status_text = metadata.get('status') or getattr(item, 'status', '')
                        archive_id = metadata.get('archive_id') or getattr(item, 'archive_id', '')
                        
                        # Determine if borrowable using unified helper
                        is_borrowable = _is_openlibrary_downloadable(ebook_access, status_text)
                        
                        if is_borrowable:
                            # Book IS borrowable on Archive.org
                            log(f"  ✓ Available for borrowing on Archive.org", flush=True)
                            log(f"  → Queued for auto-borrowing...", flush=True)
                            # Queue borrow request as special dict object
                            ocaid = archive_id
                            if not ocaid and isbn:
                                try:
                                    import requests
                                    ol_url = f'https://openlibrary.org/isbn/{isbn}.json'
                                    r = requests.get(ol_url, timeout=5)
                                    if r.status_code == 200:
                                        ol_data = r.json()
                                        ocaid = ol_data.get('ocaid')
                                except Exception as e:
                                    log(f"  ⚠ Could not fetch OCAID from OpenLibrary: {e}", file=sys.stderr)
                            if ocaid:
                                urls_to_download.append({
                                    '__borrow_request__': True,
                                    'book_id': ocaid,
                                    'isbn': isbn,
                                    'title': title,
                                    'olid': olid or getattr(item, 'openlibrary_id', '')
                                })
                            else:
                                # OCAID not found - book claims borrowable but not on Archive.org
                                # Fall back to LibGen search instead
                                log(f"  ⚠ No Archive.org ID found - attempting LibGen instead...", file=sys.stderr)
                                if isbn:
                                    try:
                                        from helper.search_provider import get_provider
                                        libgen_provider = get_provider("libgen", config)
                                        if libgen_provider:
                                            libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1)
                                            if libgen_results:
                                                libgen_result = libgen_results[0]
                                                url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None)
                                                if url:
                                                    urls_to_download.append(url)
                                                    log(f"  ✓ Found on LibGen instead", flush=True)
                                                else:
                                                    log(f"  ⚠ Not found on LibGen", file=sys.stderr)
                                            else:
                                                log(f"  ⚠ Not found on LibGen", file=sys.stderr)
                                        else:
                                            log(f"  ⚠ LibGen provider not available", file=sys.stderr)
                                    except Exception as e:
                                        log(f"  ✗ Error searching LibGen: {e}", file=sys.stderr)
                                else:
                                    log(f"  ⚠ ISBN not available for LibGen fallback", file=sys.stderr)
                        else:
                            # Book is NOT borrowable - route to LibGen
                            if isbn:
                                log(f"  ⚠ Not available on Archive.org - attempting LibGen...", flush=True)
                                try:
                                    from helper.search_provider import get_provider
                                    libgen_provider = get_provider("libgen", config)
                                    if libgen_provider:
                                        libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1)
                                        if libgen_results:
                                            libgen_result = libgen_results[0]
                                            url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None)
                                            if url:
                                                urls_to_download.append(url)
                                                log(f"  ✓ Found on LibGen", flush=True)
                                            else:
                                                log(f"  ⚠ Not found on LibGen", file=sys.stderr)
                                        else:
                                            log(f"  ⚠ Not found on LibGen", flush=True)
                                            log(f"  ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
                                    else:
                                        log(f"  ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
                                except Exception as e:
                                    log(f"  ⚠ Could not search LibGen: {e}", file=sys.stderr)
                                    log(f"  ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
                            else:
                                log(f"  ⚠ ISBN not available", flush=True)
                                log(f"  ▶ Visit: {getattr(item, 'target', 'https://openlibrary.org')}", flush=True)
                                log(f"  ▶ Or find ISBN and use: search-file -provider libgen 'isbn:\"<ISBN>\"'", flush=True)
                    elif origin == 'soulseek':
                        # Handle Soulseek downloads using the provider
                        metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {}
                        username = metadata.get('username')
                        filename = metadata.get('filename')
                        size = getattr(item, 'size_bytes', 0) or 0
                        
                        if username and filename:
                            try:
                                import asyncio
                                from helper.search_provider import SoulSeekProvider
                                provider = SoulSeekProvider(config)
                                log(f"[search-result] Soulseek: '{title}'", flush=True)
                                log(f"  ▶ Downloading from {username}...", flush=True)
                                
                                if db:
                                    db.append_worker_stdout(worker_id, f"Downloading from Soulseek: {title} (from {username})")
                                
                                # Get temp directory from config
                                temp_dir = config.get('temp')
                                if temp_dir:
                                    temp_dir = str(Path(temp_dir).expanduser())
                                
                                # Call async download_file with asyncio.run()
                                success = asyncio.run(provider.download_file(
                                    username=username,
                                    filename=filename,
                                    file_size=size,
                                    target_dir=temp_dir
                                ))
                                
                                if success:
                                    downloaded_file = Path(provider.DOWNLOAD_DIR) / Path(filename).name
                                    if downloaded_file.exists():
                                        log(f"  ✓ Downloaded: {downloaded_file.name}", flush=True)
                                        files_downloaded_directly += 1
                                        if db:
                                            db.append_worker_stdout(worker_id, f"✓ Downloaded: {downloaded_file.name}")
                                        if pipeline_context._PIPE_ACTIVE:
                                            # Create proper PipeObject result
                                            result_dict = create_pipe_object_result(
                                                source='soulseek',
                                                identifier=filename,
                                                file_path=str(downloaded_file),
                                                cmdlet_name='download-data',
                                                title=title,
                                                target=str(downloaded_file), # Explicit target for add-file
                                                extra={
                                                    "metadata": metadata,
                                                    "origin": "soulseek"
                                                }
                                            )
                                            pipeline_context.emit(result_dict)
                                else:
                                    log(f"  ✗ Download failed (peer may be offline)", file=sys.stderr)
                                    if db:
                                        db.append_worker_stdout(worker_id, f"✗ Download failed for {title}")
                                    log(f"  ▶ Try another result: search-file -provider soulseek \"...\" | @2 | download-data", flush=True)
                            except Exception as e:
                                log(f"  ✗ Download error: {e}", file=sys.stderr)
                                if db:
                                    db.append_worker_stdout(worker_id, f"✗ Error: {e}")
                                log(f"  ▶ Alternative: search-soulseek -download \"{title}\" -storage <location>", flush=True)
                        else:
                            log(f"[search-result] Soulseek: '{title}'", flush=True)
                            log(f"  ⚠ Missing download info (username/filename)", flush=True)
                            if db:
                                db.append_worker_stdout(worker_id, f"⚠ Missing download info for {title}")
                    elif origin == 'libgen':
                        # LibGen results with mirrors dict for fallback
                        url = getattr(item, 'target', None)
                        # Extract mirrors and book_id from full_metadata
                        metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {}
                        mirrors = metadata.get('mirrors', {})
                        book_id = metadata.get('book_id', '')
                        
                        if url:
                            url_entry = {
                                'url': str(url),
                                'mirrors': mirrors,  # Alternative mirrors for fallback
                                'book_id': book_id,
                            }
                            urls_to_download.append(url_entry)
                        else:
                            urls_to_download.append(url) if url else None
                    elif origin == 'debrid':
                        url = getattr(item, 'target', None)
                else:
                    url = getattr(item, 'url', None) or getattr(item, 'link', None) or getattr(item, 'href', None) or getattr(item, 'target', None)
            
            if url:
                urls_to_download.append(str(url))
    
    if not urls_to_download and files_downloaded_directly == 0:
        log(f"No downloadable URLs found", file=sys.stderr)
        return 1
    
    log(f"Processing {len(urls_to_download)} URL(s)", flush=True)
    for i, u in enumerate(urls_to_download, 1):
        if isinstance(u, dict):
            log(f"  [{i}] Format: {u.get('format_id', '?')} from {u.get('source_url', '?')[:60]}...", flush=True)
        else:
            log(f"  [{i}] URL: {str(u)[:60]}...", flush=True)
    
    # ========================================================================
    # RESOLVE OUTPUT DIRECTORY
    # ========================================================================
    
    final_output_dir = None
    
    # Priority 1: --storage flag
    if storage_location:
        try:
            final_output_dir = SharedArgs.resolve_storage(storage_location)
            log(f"Using storage location: {storage_location} → {final_output_dir}", flush=True)
        except ValueError as e:
            log(str(e), file=sys.stderr)
            return 1
    
    # Priority 2: Config resolver
    if final_output_dir is None and resolve_output_dir is not None:
        try:
            final_output_dir = resolve_output_dir(config)
            log(f"Using config resolver: {final_output_dir}", flush=True)
        except Exception:
            pass
    
    # Priority 4: Config outfile
    if final_output_dir is None and config and config.get("outfile"):
        try:
            final_output_dir = Path(config["outfile"]).expanduser()
            log(f"Using config outfile: {final_output_dir}", flush=True)
        except Exception:
            pass
    
    # Priority 5: Default (home/Videos)
    if final_output_dir is None:
        final_output_dir = Path.home() / "Videos"
        log(f"Using default directory: {final_output_dir}", flush=True)
    
    # Ensure directory exists
    try:
        final_output_dir.mkdir(parents=True, exist_ok=True)
    except Exception as e:
        log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr)
        return 1
    
    # ========================================================================
    # DOWNLOAD EACH URL
    # ========================================================================
    
    downloaded_files = []
    playlists_displayed = 0
    formats_displayed = False  # NEW: Track if we showed formats
    exit_code = 0
    
    for url in urls_to_download:
        try:
            selected_playlist_entries: list[Dict[str, Any]] = []
            playlist_existing_paths: set[str] = set()
            
            # ====== HANDLE FORMAT SELECTION FROM PIPED RESULT ======
            # If url is a dict with format_id and source_url, extract them and override format_selector
            current_format_selector = format_selector
            actual_url = url
            if isinstance(url, dict) and url.get('format_id') and url.get('source_url'):
                log(f"🎬 Format selected: {url.get('format_id')}", flush=True)
                format_id = url.get('format_id')
                current_format_selector = format_id
                
                # If it's a video-only format (has vcodec but no acodec), add bestaudio
                vcodec = url.get('vcodec', '')
                acodec = url.get('acodec', '')
                if vcodec and vcodec != "none" and (not acodec or acodec == "none"):
                    # Video-only format, add bestaudio automatically
                    current_format_selector = f"{format_id}+bestaudio"
                    log(f"  ℹ️ Video-only format detected, automatically adding bestaudio", flush=True)
                
                actual_url = url.get('source_url')
                url = actual_url  # Use the actual URL for further processing
            
            # ====== AUTO-BORROW MODE - INTERCEPT SPECIAL BORROW REQUEST DICTS ======
            if isinstance(url, dict) and url.get('__borrow_request__'):
                try:
                    from helper.archive_client import credential_openlibrary, loan, get_book_infos, download
                    import tempfile
                    import shutil
                    
                    book_id = url.get('book_id')
                    if not book_id:
                        log(f"  ✗ Missing book ID for borrowing", file=sys.stderr)
                        exit_code = 1
                        continue
                    
                    title_val = url.get('title', 'Unknown Book')
                    book_id_str = str(book_id)
                    
                    log(f"[auto-borrow] Starting borrow for: {title_val}", flush=True)
                    log(f"  Book ID: {book_id_str}", flush=True)
                    
                    # Get Archive.org credentials
                    email, password = credential_openlibrary(config)
                    if not email or not password:
                        log(f"  ✗ Archive.org credentials not configured", file=sys.stderr)
                        log(f"  ▶ Set ARCHIVE_EMAIL and ARCHIVE_PASSWORD environment variables", file=sys.stderr)
                        exit_code = 1
                        continue
                    
                    # Attempt to borrow and download
                    try:
                        log(f"  → Logging into Archive.org...", flush=True)
                        from helper.archive_client import login
                        import requests
                        try:
                            session = login(email, password)
                        except requests.exceptions.Timeout:
                            log(f"  ✗ Timeout logging into Archive.org (server not responding)", file=sys.stderr)
                            exit_code = 1
                            continue
                        except requests.exceptions.RequestException as e:
                            log(f"  ✗ Error connecting to Archive.org: {e}", file=sys.stderr)
                            exit_code = 1
                            continue
                        
                        log(f"  → Borrowing book...", flush=True)
                        try:
                            session = loan(session, book_id_str, verbose=True)
                        except requests.exceptions.Timeout:
                            log(f"  ✗ Timeout while borrowing (server not responding)", file=sys.stderr)
                            exit_code = 1
                            continue
                        except requests.exceptions.RequestException as e:
                            log(f"  ✗ Error while borrowing: {e}", file=sys.stderr)
                            exit_code = 1
                            continue
                        
                        log(f"  → Extracting page information...", flush=True)
                        # Try both URL formats
                        book_urls = [
                            f"https://archive.org/borrow/{book_id_str}",
                            f"https://archive.org/details/{book_id_str}"
                        ]
                        
                        title = None
                        links = None
                        metadata = None
                        last_error = None
                        for book_url in book_urls:
                            try:
                                title, links, metadata = get_book_infos(session, book_url)
                                if title and links:
                                    log(f"  → Found {len(links)} pages", flush=True)
                                    break
                            except requests.exceptions.Timeout:
                                last_error = "Timeout while extracting pages"
                                log(f"  ⚠ Timeout while extracting from {book_url}", flush=True)
                                continue
                            except Exception as e:
                                last_error = str(e)
                                log(f"  ⚠ Failed to extract from {book_url}: {e}", flush=True)
                                continue
                        
                        if not links:
                            log(f"  ✗ Could not extract book pages (Last error: {last_error})", file=sys.stderr)
                            exit_code = 1
                            continue
                        
                        # Download pages
                        log(f"  → Downloading {len(links)} pages...", flush=True)
                        with tempfile.TemporaryDirectory() as temp_dir:
                            # download(session, n_threads, directory, links, scale, book_id)
                            images = download(
                                session,
                                n_threads=4,
                                directory=temp_dir,
                                links=links,
                                scale=2,
                                book_id=str(book_id)
                            )
                            
                            if not images:
                                log(f"  ✗ No pages downloaded", file=sys.stderr)
                                exit_code = 1
                                continue
                            
                            log(f"  ✓ Downloaded {len(images)} pages", flush=True)
                            
                            # Try to merge into PDF
                            try:
                                import img2pdf
                                log(f"  → Merging pages into PDF...", flush=True)
                                
                                filename = title if title else f"book_{book_id_str}"
                                filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
                                output_path = Path(final_output_dir) / f"{filename}.pdf"
                                
                                # Make unique filename if needed
                                i = 1
                                while output_path.exists():
                                    output_path = Path(final_output_dir) / f"{filename}({i}).pdf"
                                    i += 1
                                
                                pdf_content = img2pdf.convert(images)
                                if pdf_content:
                                    with open(output_path, 'wb') as f:
                                        f.write(pdf_content)
                                
                                log(f"  ✓ Successfully borrowed and saved to: {output_path}", flush=True)
                                downloaded_files.append(str(output_path))
                                
                                # Emit result for downstream cmdlets
                                file_hash = _compute_file_hash(output_path)
                                # Build tags including ISBN if available
                                emit_tags = ['book', 'borrowed', 'pdf']
                                isbn_tag = url.get('isbn')
                                if isbn_tag:
                                    emit_tags.append(f'isbn:{isbn_tag}')
                                olid_tag = url.get('olid')
                                if olid_tag:
                                    emit_tags.append(f'olid:{olid_tag}')
                                
                                # Fetch OpenLibrary metadata tags
                                ol_tags = fetch_openlibrary_metadata_tags(isbn=isbn_tag, olid=olid_tag)
                                emit_tags.extend(ol_tags)
                                
                                pipe_obj = create_pipe_object_result(
                                    source='archive.org',
                                    identifier=book_id_str,
                                    file_path=str(output_path),
                                    cmdlet_name='download-data',
                                    title=title_val,
                                    file_hash=file_hash,
                                    tags=emit_tags,
                                    source_url=url.get('source_url', f'archive.org/borrow/{book_id_str}')
                                )
                                pipeline_context.emit(pipe_obj)
                                exit_code = 0
                            except ImportError:
                                log(f"  ⚠ img2pdf not available - saving pages as collection", file=sys.stderr)
                                # Just copy images to output dir
                                filename = title if title else f"book_{book_id_str}"
                                filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
                                output_dir = Path(final_output_dir) / filename
                                i = 1
                                while output_dir.exists():
                                    output_dir = Path(final_output_dir) / f"{filename}({i})"
                                    i += 1
                                
                                shutil.copytree(temp_dir, str(output_dir))
                                log(f"  ✓ Successfully borrowed and saved to: {output_dir}", flush=True)
                                downloaded_files.append(str(output_dir))
                                
                                # Emit result for downstream cmdlets
                                # Build tags including ISBN if available
                                emit_tags = ['book', 'borrowed', 'pages']
                                isbn_tag = url.get('isbn')
                                if isbn_tag:
                                    emit_tags.append(f'isbn:{isbn_tag}')
                                olid_tag = url.get('olid')
                                if olid_tag:
                                    emit_tags.append(f'olid:{olid_tag}')
                                
                                # Fetch OpenLibrary metadata tags
                                ol_tags = fetch_openlibrary_metadata_tags(isbn=isbn_tag, olid=olid_tag)
                                emit_tags.extend(ol_tags)
                                
                                pipe_obj = create_pipe_object_result(
                                    source='archive.org',
                                    identifier=book_id_str,
                                    file_path=str(output_dir),
                                    cmdlet_name='download-data',
                                    title=title_val,
                                    tags=emit_tags,
                                    source_url=url.get('source_url', f'archive.org/borrow/{book_id_str}')
                                )
                                pipeline_context.emit(pipe_obj)
                                exit_code = 0
                    
                    except Exception as e:
                        log(f"  ✗ Borrow/download failed: {e}", file=sys.stderr)
                        import traceback
                        traceback.print_exc()
                        exit_code = 1
                    
                    continue  # Skip normal URL handling
                
                except ImportError as e:
                    log(f"  ✗ Archive.org tools not available: {e}", file=sys.stderr)
                    exit_code = 1
                    continue
                except Exception as e:
                    log(f"  ✗ Auto-borrow error: {e}", file=sys.stderr)
                    import traceback
                    traceback.print_exc()
                    exit_code = 1
                    continue

            
            # ====== LIBGEN MIRROR FALLBACK MODE ======
            # Handle libgen results with mirrors dict for fallback on failure
            if isinstance(url, dict) and 'mirrors' in url:
                try:
                    primary_url = url.get('url')
                    mirrors_dict = url.get('mirrors', {})
                    book_id = url.get('book_id', '')
                    
                    if not primary_url:
                        log(f"Skipping libgen entry: no primary URL", file=sys.stderr)
                        exit_code = 1
                        continue
                    
                    # Build list of mirrors to try: primary first, then alternatives
                    mirrors_to_try = [primary_url]
                    mirrors_to_try.extend(mirrors_dict.values())
                    
                    # Remove duplicates while preserving order
                    mirrors_to_try = list(dict.fromkeys(mirrors_to_try))
                    
                    log(f"🔄 LibGen download with mirror fallback (book_id: {book_id})", flush=True)
                    log(f"  Primary: {primary_url[:80]}...", flush=True)
                    
                    if len(mirrors_to_try) > 1:
                        log(f"  {len(mirrors_to_try) - 1} alternative mirror(s) available", flush=True)
                    
                    # Resolve cookies path
                    final_cookies_path_libgen = None
                    if cookies_path:
                        if resolve_cookies_path:
                            try:
                                final_cookies_path_libgen = resolve_cookies_path(config, Path(cookies_path))
                            except Exception:
                                final_cookies_path_libgen = Path(cookies_path).expanduser() if cookies_path else None
                        else:
                            final_cookies_path_libgen = Path(cookies_path).expanduser()
                    
                    download_succeeded = False
                    last_error = None
                    successful_mirror = None
                    
                    # Try each mirror in sequence using libgen_service's native download
                    for mirror_idx, mirror_url in enumerate(mirrors_to_try, 1):
                        try:
                            if mirror_idx > 1:
                                log(f"  → Trying mirror #{mirror_idx}: {mirror_url[:80]}...", flush=True)
                            
                            # Use libgen_service's download_from_mirror for proper libgen handling
                            from helper.libgen_service import download_from_mirror
                            
                            # Generate filename from book_id and title
                            safe_title = "".join(c for c in str(title or "book") if c.isalnum() or c in (' ', '.', '-'))[:100]
                            file_path = final_output_dir / f"{safe_title}_{book_id}.pdf"
                            
                            # Attempt download using libgen's native function
                            success = download_from_mirror(
                                mirror_url=mirror_url,
                                output_path=file_path,
                                log_info=lambda msg: log(f"    {msg}", flush=True),
                                log_error=lambda msg: log(f"    ⚠ {msg}", file=sys.stderr)
                            )
                            
                            if success and file_path.exists():
                                log(f"  ✓ Downloaded successfully from mirror #{mirror_idx}", flush=True)
                                successful_mirror = mirror_url
                                download_succeeded = True
                                
                                # Emit result for downstream cmdlets
                                file_hash = _compute_file_hash(file_path)
                                emit_tags = ['libgen', 'book']
                                
                                pipe_obj = create_pipe_object_result(
                                    source='libgen',
                                    identifier=book_id,
                                    file_path=str(file_path),
                                    cmdlet_name='download-data',
                                    file_hash=file_hash,
                                    tags=emit_tags,
                                    source_url=successful_mirror
                                )
                                pipeline_context.emit(pipe_obj)
                                downloaded_files.append(str(file_path))
                                exit_code = 0
                                break  # Success, stop trying mirrors
                        
                        except Exception as e:
                            last_error = str(e)
                            if mirror_idx == 1:
                                log(f"  ⚠ Primary mirror failed: {e}", flush=True)
                            else:
                                log(f"  ⚠ Mirror #{mirror_idx} failed: {e}", flush=True)
                    
                    if not download_succeeded:
                        log(f"  ✗ All mirrors failed. Last error: {last_error}", file=sys.stderr)
                        if "getaddrinfo failed" in str(last_error) or "NameResolutionError" in str(last_error) or "Failed to resolve" in str(last_error):
                            log(f"  ⚠ Network issue detected: Cannot resolve LibGen mirror hostnames", file=sys.stderr)
                            log(f"  ▶ Check your network connection or try with a VPN/proxy", file=sys.stderr)
                        exit_code = 1
                    
                    continue  # Skip to next URL
                
                except Exception as e:
                    log(f"  ✗ LibGen mirror fallback error: {e}", file=sys.stderr)
                    import traceback
                    traceback.print_exc(file=sys.stderr)
                    exit_code = 1
                    continue
            
            # Ensure URL is a string for normal handling
            if not isinstance(url, str):
                # Check if it's a playlist item marker
                if isinstance(url, dict) and url.get('__playlist_url'):
                    playlist_url = url.get('__playlist_url')
                    item_num = url.get('__playlist_item', 1)
                    log(f"📍 Handling selected playlist item #{item_num}", flush=True)
                    # Convert to actual URL and set playlist_items to download only this item
                    url = playlist_url
                    playlist_items = str(item_num)
                    # Fall through to normal handling below
                else:
                    log(f"Skipping invalid URL entry: {url}", file=sys.stderr)
                    continue
            
            log(f"Probing URL: {url}", flush=True)
            
            # ====== TORRENT MODE - INTERCEPT BEFORE NORMAL DOWNLOAD ======
            if torrent_mode or url.lower().startswith('magnet:'):
                log(f"🧲 Torrent/magnet mode - spawning background worker...", flush=True)
                
                try:
                    # Get API key from config
                    from config import get_debrid_api_key
                    api_key = get_debrid_api_key(config)
                    
                    if not api_key:
                        log(f"✗ AllDebrid API key not found in config", file=sys.stderr)
                        exit_code = 1
                        continue
                    
                    # Create a unique worker ID
                    worker_id = f"torrent_{uuid.uuid4().hex[:8]}"
                    
                    # Get worker manager if available from config
                    worker_manager = config.get('_worker_manager')
                    
                    # Create worker in manager if available
                    if worker_manager:
                        try:
                            worker_manager.track_worker(
                                worker_id,
                                worker_type="download_torrent",
                                title=f"Download: {url[:60]}...",
                                description=f"Torrent/magnet download via AllDebrid",
                                pipe=pipeline_context.get_current_command_text()
                            )
                            log(f"✓ Worker created (ID: {worker_id})", flush=True)
                        except Exception as e:
                            log(f"⚠ Failed to create worker: {e}", file=sys.stderr)
                            worker_manager = None
                    
                    # Spawn background thread to handle the download
                    worker_thread = threading.Thread(
                        target=_download_torrent_worker,
                        args=(
                            worker_id,
                            url,
                            final_output_dir,
                            config,
                            api_key,
                            playlist_items,
                            audio_mode,
                            wait_timeout,
                            worker_manager,
                        ),
                        daemon=False,
                        name=f"TorrentWorker_{worker_id}"
                    )
                    
                    worker_thread.start()
                    log(f"✓ Background worker started (ID: {worker_id})", flush=True)
                    
                    # Emit worker info so user can track it
                    worker_info = {
                        'worker_id': worker_id,
                        'worker_type': 'download_torrent',
                        'source_url': url,
                        'status': 'running',
                        'message': 'Downloading in background...'
                    }
                    pipeline_context.emit(worker_info)
                    
                    continue
                    
                except ImportError:
                    log(f"✗ AllDebrid client not available", file=sys.stderr)
                    exit_code = 1
                except Exception as e:
                    # Catches AllDebridError and other exceptions
                    log(f"✗ Failed to spawn torrent worker: {e}", file=sys.stderr)
                    import traceback
                    traceback.print_exc(file=sys.stderr)
                    exit_code = 1
                
                continue  # Skip to next URL
            
            # ====== NORMAL DOWNLOAD MODE (HTTP/HTTPS) ======
            
            # First, probe the URL to detect playlists and get info
            # For YouTube URLs, ignore playlists and only probe the single video
            is_youtube_url = isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url)
            probe_info = probe_url(url, no_playlist=is_youtube_url)
            is_actual_playlist = False  # Track if we have a real multi-item playlist
            
            if probe_info:
                log(f"✓ Probed: {probe_info.get('title', url)} ({probe_info.get('extractor', 'unknown')})")
                
                # If it's a playlist, show the result table and skip download for now
                entries = probe_info.get("entries", [])
                if entries and not playlist_items:
                    is_actual_playlist = True  # We have a real playlist with multiple items
                    # Playlist detected but NO selection provided
                    # Always show table for user to select items
                    log(f"📋 Found playlist with {len(entries)} items")
                    _show_playlist_table(url, probe_info)
                    log(f"ℹ️  Playlist displayed. To select items, use @* or @1,3,5-8 syntax after piping results")
                    playlists_displayed += 1
                    continue  # Skip to next URL - don't download playlist without selection
                elif entries and playlist_items:
                    is_actual_playlist = True  # We have a real playlist with item selection
                    # Playlist detected WITH selection - will download below
                    # Expand wildcard if present
                    expanded_items = _expand_playlist_selection(playlist_items, len(entries))
                    playlist_items = expanded_items
                    selected_playlist_entries = _select_playlist_entries(entries, playlist_items)
                    log(f"📋 Found playlist with {len(entries)} items - downloading selected: {playlist_items}")
                else:
                    log(f"Single item: {probe_info.get('title', 'Unknown')}")
            
            # ====== FORMAT LISTING MODE ======
            if list_formats_mode and isinstance(url, str) and url.startswith(('http://', 'https://')):
                log(f"Fetching formats for: {url}", flush=True)
                from helper.download import list_formats
                from result_table import ResultTable
                
                all_formats = list_formats(url, no_playlist=is_youtube_url, playlist_items=playlist_items)
                if all_formats:
                    # Filter and sort formats for better user experience
                    formats = _filter_and_sort_formats(all_formats)
                    
                    # Create result table for format display
                    table = ResultTable(title=f"Available Formats - {probe_info.get('title', 'Unknown')}")
                    
                    for fmt in formats:
                        row = table.add_row()
                        row.add_column("Format ID", fmt.get("format_id", ""))
                        
                        # Build resolution/bitrate string
                        vcodec = fmt.get("vcodec", "")
                        acodec = fmt.get("acodec", "")
                        height = fmt.get("height")
                        tbr = fmt.get("tbr")
                        
                        if vcodec != "none" and acodec != "none":
                            # Video + audio
                            res_str = fmt.get("resolution", "")
                        elif acodec != "none" and vcodec == "none":
                            # Audio only - show bitrate
                            res_str = f"{tbr:.0f} kbps" if tbr else "audio"
                        else:
                            # Video only
                            res_str = fmt.get("resolution", "")
                        
                        row.add_column("Resolution", res_str)
                        
                        # Build codec string (merged vcodec/acodec)
                        codec_parts = []
                        if vcodec and vcodec != "none":
                            codec_parts.append(f"v:{vcodec}")
                        if acodec and acodec != "none":
                            codec_parts.append(f"a:{acodec}")
                        codec_str = " | ".join(codec_parts) if codec_parts else "unknown"
                        row.add_column("Codec", codec_str)
                        
                        if fmt.get("filesize"):
                            size_mb = fmt["filesize"] / (1024 * 1024)
                            row.add_column("Size", f"{size_mb:.1f} MB")
                    
                    # Set source command for @N expansion
                    table.set_source_command("download-data", [url])
                    
                    # Note: Row selection args are not set - users select with @N syntax directly
                    
                    # Display table and emit as pipeline result
                    log(str(table), flush=True)
                    formats_displayed = True
                    
                    # Store table for @N expansion so CLI can reconstruct commands
                    # Uses separate current_stage_table instead of result history table
                    pipeline_context.set_current_stage_table(table)
                    
                    # Always emit formats so they can be selected with @N
                    for i, fmt in enumerate(formats, 1):
                        pipeline_context.emit({
                            "format_id": fmt.get("format_id", ""),
                            "format_string": fmt.get("format", ""),
                            "resolution": fmt.get("resolution", ""),
                            "vcodec": fmt.get("vcodec", ""),
                            "acodec": fmt.get("acodec", ""),
                            "ext": fmt.get("ext", ""),
                            "filesize": fmt.get("filesize"),
                            "source_url": url,
                            "index": i,
                        })
                    log(f"Use @N syntax to select a format and download", flush=True)
                else:
                    log(f"✗ No formats available for this URL", file=sys.stderr)
                
                continue  # Skip download, just show formats
            
            # ====== AUTO-DETECT MULTIPLE FORMATS ======
            # Check if multiple formats exist and handle based on -item flag
            if (not current_format_selector and not list_formats_mode and 
                isinstance(url, str) and url.startswith(('http://', 'https://'))):
                # Check if this is a yt-dlp supported URL (YouTube, Vimeo, etc.)
                from helper.download import is_url_supported_by_ytdlp, list_formats
                from result_table import ResultTable
                
                if is_url_supported_by_ytdlp(url):
                    log(f"Checking available formats for: {url}", flush=True)
                    all_formats = list_formats(url, no_playlist=is_youtube_url, playlist_items=playlist_items)
                    
                    if all_formats:
                        # Filter and sort formats for better user experience
                        formats = _filter_and_sort_formats(all_formats)
                        
                        # Handle -item selection for formats (single video)
                        if playlist_items and playlist_items.isdigit() and not is_actual_playlist:
                            idx = int(playlist_items)
                            if 0 < idx <= len(formats):
                                fmt = formats[idx-1]
                                current_format_selector = fmt.get("format_id")
                                log(f"Selected format #{idx}: {current_format_selector}")
                                playlist_items = None # Clear so it doesn't affect download options
                            else:
                                log(f"Invalid format index: {idx}", file=sys.stderr)
                        
                        elif len(formats) > 1:
                            # Multiple formats available
                            log(f"📊 Found {len(formats)} available formats for: {probe_info.get('title', 'Unknown')}", flush=True)
                            
                            # Always show table for format selection via @N syntax
                            # Show table and wait for @N selection
                            table = ResultTable(title=f"Available Formats - {probe_info.get('title', 'Unknown')}")
                            
                            for fmt in formats:
                                row = table.add_row()
                                row.add_column("Format ID", fmt.get("format_id", ""))
                                
                                # Build resolution/bitrate string
                                vcodec = fmt.get("vcodec", "")
                                acodec = fmt.get("acodec", "")
                                height = fmt.get("height")
                                tbr = fmt.get("tbr")
                                
                                if vcodec != "none" and acodec != "none":
                                    # Video + audio
                                    res_str = fmt.get("resolution", "")
                                elif acodec != "none" and vcodec == "none":
                                    # Audio only - show bitrate
                                    res_str = f"{tbr:.0f} kbps" if tbr else "audio"
                                else:
                                    # Video only
                                    res_str = fmt.get("resolution", "")
                                
                                row.add_column("Resolution", res_str)
                                
                                # Build codec string (merged vcodec/acodec)
                                codec_parts = []
                                if vcodec and vcodec != "none":
                                    codec_parts.append(f"v:{vcodec}")
                                if acodec and acodec != "none":
                                    codec_parts.append(f"a:{acodec}")
                                codec_str = " | ".join(codec_parts) if codec_parts else "unknown"
                                row.add_column("Codec", codec_str)
                                
                                if fmt.get("filesize"):
                                    size_mb = fmt["filesize"] / (1024 * 1024)
                                    row.add_column("Size", f"{size_mb:.1f} MB")
                            
                            # Set source command for @N expansion
                            table.set_source_command("download-data", [url])
                            
                            # Set row selection args so @N expands to "download-data URL -item N"
                            for i in range(len(formats)):
                                # i is 0-based index, but -item expects 1-based index
                                table.set_row_selection_args(i, ["-item", str(i + 1)])
                            
                            # Display table and emit formats so they can be selected with @N
                            log(str(table), flush=True)
                            log(f"💡 Use @N syntax to select a format and download (e.g., @1)", flush=True)
                            
                            # Store table for @N expansion so CLI can reconstruct commands
                            pipeline_context.set_current_stage_table(table)
                            
                            # Emit formats as pipeline results for @N selection
                            for i, fmt in enumerate(formats, 1):
                                pipeline_context.emit({
                                    "format_id": fmt.get("format_id", ""),
                                    "format_string": fmt.get("format", ""),
                                    "resolution": fmt.get("resolution", ""),
                                    "vcodec": fmt.get("vcodec", ""),
                                    "acodec": fmt.get("acodec", ""),
                                    "filesize": fmt.get("filesize"),
                                    "tbr": fmt.get("tbr"),
                                    "source_url": url,
                                    "index": i,
                                })
                            
                            formats_displayed = True  # Mark that we displayed formats
                            continue  # Skip download, user must select format via @N
            
            log(f"Downloading: {url}", flush=True)
            
            # Resolve cookies path if specified
            final_cookies_path = None
            if cookies_path:
                if resolve_cookies_path:
                    try:
                        final_cookies_path = resolve_cookies_path(config, Path(cookies_path))
                    except Exception:
                        final_cookies_path = Path(cookies_path).expanduser() if cookies_path else None
                else:
                    final_cookies_path = Path(cookies_path).expanduser()
            
            # Create download options - use correct parameter names
            # Mode is "audio" or "video", required field
            mode = "audio" if audio_mode else "video"
            
            # Detect YouTube URLs and set no_playlist to download only the single video
            is_youtube_url = isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url)
            
            download_opts = DownloadOptions(
                url=url,
                mode=mode,
                output_dir=final_output_dir,
                cookies_path=final_cookies_path,
                ytdl_format=current_format_selector,  # Use per-URL format override if available
                clip_sections=f"{clip_range[0]}-{clip_range[1]}" if clip_range else None,
                playlist_items=playlist_items,
                no_playlist=is_youtube_url,  # For YouTube, ignore playlist URLs and download single video
            )
            
            # For playlist downloads, capture existing files BEFORE download
            if playlist_items and selected_playlist_entries:
                _, playlist_existing_paths = _snapshot_playlist_paths(selected_playlist_entries, final_output_dir)
            
            # Call download_media from helper - no show_progress param
            result_data = download_media(download_opts)
            
            if result_data and result_data.path:
                file_path = result_data.path
                
                if file_path.exists():
                    # Check if this was a playlist download (is_actual_playlist tracks if we have a multi-item playlist)
                    if is_actual_playlist:
                        if not selected_playlist_entries:
                            log(
                                "⚠ Playlist metadata unavailable; cannot emit selected items for this stage.",
                                file=sys.stderr,
                            )
                            exit_code = 1
                            continue

                        matched_after, _ = _snapshot_playlist_paths(selected_playlist_entries, final_output_dir)
                        if not matched_after:
                            log(
                                "⚠ No playlist files found for the selected items after download.",
                                file=sys.stderr,
                            )
                            exit_code = 1
                            continue

                        new_playlist_files: list[Path] = []
                        for playlist_file in matched_after:
                            try:
                                path_key = str(playlist_file.resolve())
                            except OSError:
                                path_key = str(playlist_file)
                            if path_key not in playlist_existing_paths:
                                new_playlist_files.append(playlist_file)

                        emit_targets = new_playlist_files if new_playlist_files else matched_after
                        if new_playlist_files:
                            log(f"📋 Playlist download completed: {len(new_playlist_files)} new file(s)")
                        else:
                            log(f"📁 Reusing {len(emit_targets)} cached playlist file(s)", flush=True)

                        for playlist_file in emit_targets:
                            file_hash = _compute_file_hash(playlist_file)

                            tags = []
                            if extract_ytdlp_tags and result_data.tags:
                                tags = result_data.tags

                            pipe_obj = create_pipe_object_result(
                                source='download',
                                identifier=playlist_file.stem,
                                file_path=str(playlist_file),
                                cmdlet_name='download-data',
                                title=playlist_file.name,
                                file_hash=file_hash,
                                is_temp=False,
                                extra={
                                    'url': url,
                                    'tags': tags,
                                    'audio_mode': audio_mode,
                                    'format': format_selector,
                                    'from_playlist': True,
                                },
                            )

                            downloaded_files.append(playlist_file)
                            pipeline_context.emit(pipe_obj)
                    else:
                        # Single file download
                        file_hash = result_data.hash_value or _compute_file_hash(file_path)
                        tags = result_data.tags if result_data.tags else []
                        
                        pipe_obj = create_pipe_object_result(
                            source='download',
                            identifier=file_path.stem,
                            file_path=str(file_path),
                            cmdlet_name='download-data',
                            title=file_path.name,
                            file_hash=file_hash,
                            is_temp=False,
                            extra={
                                'url': url,
                                'tags': tags,
                                'audio_mode': audio_mode,
                                'format': format_selector,
                                'clipped': clip_range is not None,
                            }
                        )
                        
                        downloaded_files.append(file_path)
                        pipeline_context.emit(pipe_obj)
                    
                    log(f"✓ Downloaded: {file_path}", flush=True)
            else:
                log(f"Download returned no result for {url}", file=sys.stderr)
                exit_code = 1
        
        except Exception as e:
            log(f"Error downloading {url}: {e}", file=sys.stderr)
            import traceback
            traceback.print_exc(file=sys.stderr)
            exit_code = 1
    
    # Success if we downloaded files or displayed playlists/formats
    if downloaded_files or files_downloaded_directly > 0:
        total_files = len(downloaded_files) + files_downloaded_directly
        log(f"✓ Successfully downloaded {total_files} file(s)", flush=True)
        
        # Create a result table for the downloaded files
        # This ensures that subsequent @N commands select from these files
        # instead of trying to expand the previous command (e.g. search-file)
        if downloaded_files:
            from result_table import ResultTable
            table = ResultTable("Downloaded Files")
            for i, file_path in enumerate(downloaded_files):
                row = table.add_row()
                row.add_column("#", str(i + 1))
                row.add_column("File", file_path.name)
                row.add_column("Path", str(file_path))
                try:
                    size_mb = file_path.stat().st_size / (1024*1024)
                    row.add_column("Size", f"{size_mb:.1f} MB")
                except OSError:
                    row.add_column("Size", "?")
                
                # Set selection args to just the file path (or index if we want item selection)
                # For item selection fallback, we don't strictly need row args if source command is None
                # But setting them helps if we want to support command expansion later
                table.set_row_selection_args(i, [str(file_path)])
            
            # Register the table but DO NOT set a source command
            # This forces CLI to use item-based selection (filtering the pipe)
            # instead of command expansion
            pipeline_context.set_last_result_table_overlay(table, downloaded_files)
            pipeline_context.set_current_stage_table(table)
            
            # Also print the table so user sees what they got
            log(str(table), flush=True)

        if db:
            db.update_worker_status(worker_id, 'completed')
        return 0
    
    if playlists_displayed:
        log(f"✓ Displayed {playlists_displayed} playlist(s) for selection", flush=True)
        if db:
            db.update_worker_status(worker_id, 'completed')
            db.close()
        return 0  # Success - playlists shown
    
    if formats_displayed:
        log(f"✓ Format selection table displayed - use @N to select and download", flush=True)
        if db:
            db.update_worker_status(worker_id, 'completed')
            db.close()
        return 0  # Success - formats shown
    
    log(f"No files were downloaded or playlists displayed", file=sys.stderr)
    if db:
        db.update_worker_status(worker_id, 'completed')
        db.close()
    return 1


CMDLET = Cmdlet(
    name="download-data",
    exec=_run,
    summary="Download data from URLs with playlist/clip support using yt-dlp",
    usage="download-data <url> [options] or search-file | download-data [options]",
    aliases=["download", "dl"],
    args=[
        CmdletArg(
            name="url",
            type="string",
            required=False,
            description="URL to download (HTTP/HTTPS or file with URL list)",
            variadic=True
        ),
        CmdletArg(
            name="-url",
            type="string",
            description="URL to download (alias for positional argument)",
            variadic=True
        ),
        CmdletArg(
            name="list-formats",
            type="flag",
            description="List available formats without downloading"
        ),
        CmdletArg(
            name="audio",
            type="flag",
            alias="a",
            description="Download audio only (extract from video)"
        ),
        CmdletArg(
            name="video",
            type="flag",
            alias="v",
            description="Download video (default if not specified)"
        ),
        CmdletArg(
            name="format",
            type="string",
            alias="fmt",
            description="Explicit yt-dlp format selector (e.g., 'bestvideo+bestaudio')"
        ),
        CmdletArg(
            name="clip",
            type="string",
            description="Extract time range: MM:SS-MM:SS (e.g., 34:03-35:08) or seconds"
        ),
        CmdletArg(
            name="cookies",
            type="string",
            description="Path to cookies.txt file for authentication"
        ),
        CmdletArg(
            name="torrent",
            type="flag",
            description="Download torrent/magnet via AllDebrid (requires API key in config)"
        ),
        CmdletArg(
            name="wait",
            type="float",
            description="Wait time (seconds) for magnet processing timeout"
        ),
        CmdletArg(
            name="item",
            type="string",
            alias="items",
            description="Item selection for playlists/formats: use '-item N' to select format N, or '-item' to show table for @N selection in next command"
        ),
        SharedArgs.STORAGE,  # Storage location: local, hydrus, 0x0, debrid, ftp
    ],
    details=[
        "Download media from URLs with advanced features.",
        "",
        "BASIC USAGE:",
        "  download-data https://youtube.com/watch?v=xyz",
        "  download-data https://example.com/file.pdf -storage local",
        "",
        "AUDIO/VIDEO OPTIONS:",
        "  -audio, -a              Extract audio from video (M4A, MP3)",
        "  -video, -v              Download as video (default)",
        "",
        "FORMAT SELECTION:",
        "  -format SELECTOR        Specify yt-dlp format",
        "    Examples: 'best', 'bestvideo+bestaudio', '22'",
        "",
        "FORMAT/RESULT ITEM SELECTION:",
        "  -item                   Show available formats in table (see @N below)",
        "  -item N                 Auto-select and download format #N (e.g., -item 1)",
        "    Example: download-data URL -item 2 | add-file -storage local",
        "",
        "FORMAT SELECTION WITH @N SYNTAX:",
        "  1. Show formats: download-data URL",
        "  2. Select with @N: @1 | download-data | add-file",
        "  OR use -item N to skip manual selection",
        "",
        "CLIPPING:",
        "  -clip START-END         Extract time range from media",
        "    Format: MM:SS-MM:SS (e.g., 34:03-35:08)",
        "    Also accepts: 2043-2108 (seconds)",
        "",
        "PLAYLIST MODE:",
        "  Automatically detects playlists",
        "  Shows numbered list of tracks",
        "  Download specific items: -item '1,3,5-8'",
        "  Download all items: -item '*'",
        "",
        "TORRENT MODE:",
        "  Download torrents/magnets via AllDebrid (if configured)",
        "  Usage: download-data -torrent magnet:?xt=urn:btih:... -item '1,3,5-8'",
        "  -wait SECONDS         Maximum wait time for magnet processing (default: 1800)",
        "",
        "STORAGE LOCATIONS:",
        "  -storage local          ~/Videos (default)",
        "  -storage hydrus         ~/.hydrus/client_files",
        "  -storage 0x0            ~/Screenshots",
        "  -storage debrid         ~/Debrid",
        "  -storage ftp            ~/FTP",
        "",
        "EXAMPLES:",
        "  # Download YouTube video as audio",
        "  download-data https://youtube.com/watch?v=xyz -audio -storage local",
        "",
        "  # Extract specific clip from video",
        "  download-data https://vimeo.com/123456 -clip 1:30-2:45 -format best",
        "",
        "  # Download specific tracks from playlist",
        "  download-data https://youtube.com/playlist?list=xyz -item '1,3,5-8'",
        "",
        "  # Download all items from playlist",
        "  download-data https://youtube.com/playlist?list=xyz -item '*'",
        "",
        "  # Download with authentication",
        "  download-data https://example.com/content -cookies ~/cookies.txt",
        "",
        "TORRENT EXAMPLES:",
        "  # Download specific tracks from magnet link",
        "  download-data -torrent magnet:?xt=urn:btih:... -item '1,3,5-8' -storage local",
        "",
        "  # Download all items from torrent and merge",
        "  download-data -torrent magnet:?xt=urn:btih:... -item '*' | merge-file | add-file",
        "",
        "  # Download with custom wait time (5 minutes)",
        "  download-data -torrent magnet:?xt=urn:btih:... -wait 300 -item '1-5'",
    ]
)