Medios-Macina/cmdlet/download_media.py

"""Download media from url using yt-dlp (streaming sites only).

Focused cmdlet for video/audio downloads from yt-dlp-supported sites:
- YouTube, Twitch, Dailymotion, Vimeo, etc.
- No direct file downloads (use download-file for that)
- Playlist detection with item selection
- Clip extraction (time ranges)
- Format selection and audio/video modes
- Tags extraction and metadata integration
"""

from __future__ import annotations

import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence

import glob  # noqa: F401
import hashlib
import json  # noqa: F401
import random
import re
import string
import subprocess
import sys
import time
import traceback
from typing import Any, Dict, Iterator, List, Optional
from urllib.parse import urljoin, urlparse

import httpx

from SYS.logger import log, debug
from SYS.utils import ensure_directory, sha256_file
from API.HTTP import HTTPClient
from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar
import pipeline as pipeline_context
from result_table import ResultTable

from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, parse_cmdlet_args, register_url_with_local_library, coerce_to_pipe_object


# Minimal inlined helpers from helper/download.py (is_url_supported_by_ytdlp, list_formats)
try:
    import yt_dlp  # type: ignore
    from yt_dlp.extractor import gen_extractors  # type: ignore
except Exception as exc:
    yt_dlp = None  # type: ignore
    YTDLP_IMPORT_ERROR = exc
else:
    YTDLP_IMPORT_ERROR = None

try:
    from metadata import extract_ytdlp_tags
except ImportError:
    extract_ytdlp_tags = None

_EXTRACTOR_CACHE: List[Any] | None = None


def _ensure_yt_dlp_ready() -> None:
    if yt_dlp is not None:
        return
    detail = str(YTDLP_IMPORT_ERROR or "yt-dlp is not installed")
    raise DownloadError(f"yt-dlp module not available: {detail}")


def is_url_supported_by_ytdlp(url: str) -> bool:
    if yt_dlp is None:
        return False
    global _EXTRACTOR_CACHE
    if _EXTRACTOR_CACHE is None:
        try:
            _EXTRACTOR_CACHE = [ie for ie in gen_extractors()]  # type: ignore[arg-type]
        except Exception:
            _EXTRACTOR_CACHE = []
    for extractor in _EXTRACTOR_CACHE:
        try:
            if not extractor.suitable(url):
                continue
        except Exception:
            continue
        name = getattr(extractor, "IE_NAME", "")
        if name.lower() == "generic":
            continue
        return True
    return False


def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
    _ensure_yt_dlp_ready()
    try:
        assert yt_dlp is not None
        ydl_opts: Dict[str, Any] = {"quiet": True, "no_warnings": True, "socket_timeout": 30}
        if no_playlist:
            ydl_opts["noplaylist"] = True
        if playlist_items:
            ydl_opts["playlist_items"] = playlist_items
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:  # type: ignore[arg-type]
            debug(f"Fetching format list for: {url}")
            info = ydl.extract_info(url, download=False)
            formats = info.get("formats", [])
            if not formats:
                log("No formats available", file=sys.stderr)
                return None
            result_formats = []
            for fmt in formats:
                result_formats.append({
                    "format_id": fmt.get("format_id", ""),
                    "format": fmt.get("format", ""),
                    "ext": fmt.get("ext", ""),
                    "resolution": fmt.get("resolution", ""),
                    "width": fmt.get("width"),
                    "height": fmt.get("height"),
                    "fps": fmt.get("fps"),
                    "vcodec": fmt.get("vcodec", "none"),
                    "acodec": fmt.get("acodec", "none"),
                    "filesize": fmt.get("filesize"),
                    "abr": fmt.get("abr"),
                    "tbr": fmt.get("tbr"),
                })
            debug(f"Found {len(result_formats)} available formats")
            return result_formats
    except Exception as e:
        log(f"✗ Error fetching formats: {e}", file=sys.stderr)
        return None


def _pick_best_audio_format_id(formats: List[Dict[str, Any]]) -> Optional[str]:
    audio_only: List[Dict[str, Any]] = []
    for fmt in formats:
        if not isinstance(fmt, dict):
            continue
        format_id = str(fmt.get("format_id") or "").strip()
        if not format_id:
            continue
        vcodec = str(fmt.get("vcodec") or "none").lower()
        acodec = str(fmt.get("acodec") or "none").lower()
        if vcodec != "none":
            continue
        if not acodec or acodec == "none":
            continue
        audio_only.append(fmt)

    if not audio_only:
        return None

    def score(f: Dict[str, Any]) -> tuple[float, float]:
        tbr = f.get("tbr")
        abr = f.get("abr")
        bitrate = 0.0
        for candidate in (tbr, abr):
            try:
                if candidate is not None:
                    bitrate = max(bitrate, float(candidate))
            except Exception:
                pass
        size = 0.0
        try:
            fs = f.get("filesize")
            if fs is not None:
                size = float(fs)
        except Exception:
            pass
        return (bitrate, size)

    best = max(audio_only, key=score)
    best_id = str(best.get("format_id") or "").strip()
    return best_id or None


def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
    sections_list = ytdl_options.get("download_sections", [])
    if not sections_list:
        return "", {}

    session_id = hashlib.md5((url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()).hexdigest()[:12]
    first_section_info = None

    for section_idx, section in enumerate(sections_list, 1):
        base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
        output_dir_path = Path(base_outtmpl).parent
        filename_tmpl = f"{session_id}_{section_idx}"
        if base_outtmpl.endswith(".%(ext)s"):
            filename_tmpl += ".%(ext)s"
        section_outtmpl = str(output_dir_path / filename_tmpl)

        if section_idx == 1:
            metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
            if ytdl_options.get("cookiefile"):
                cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
                metadata_cmd.extend(["--cookies", cookies_path])
            if ytdl_options.get("noplaylist"):
                metadata_cmd.append("--no-playlist")
            metadata_cmd.append(url)
            try:
                meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
                if meta_result.returncode == 0 and meta_result.stdout:
                    try:
                        info_dict = json.loads(meta_result.stdout.strip())
                        first_section_info = info_dict
                        if not quiet:
                            debug(f"Extracted title from metadata: {info_dict.get('title')}")
                    except json.JSONDecodeError:
                        if not quiet:
                            debug("Could not parse JSON metadata")
            except Exception as e:
                if not quiet:
                    debug(f"Error extracting metadata: {e}")

        cmd = ["yt-dlp"]
        if ytdl_options.get("format"):
            cmd.extend(["-f", ytdl_options["format"]])
        if ytdl_options.get("force_keyframes_at_cuts"):
            cmd.extend(["--force-keyframes-at-cuts"]) if ytdl_options.get("force_keyframes_at_cuts") else None
        cmd.extend(["-o", section_outtmpl])
        if ytdl_options.get("cookiefile"):
            cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
            cmd.extend(["--cookies", cookies_path])
        if ytdl_options.get("noplaylist"):
            cmd.append("--no-playlist")

        # Apply clip/section selection
        cmd.extend(["--download-sections", section])

        cmd.append(url)
        if not quiet:
            debug(f"Running yt-dlp for section: {section}")
        try:
            subprocess.run(cmd, check=True)
        except Exception as exc:
            if not quiet:
                debug(f"yt-dlp error for section {section}: {exc}")

    return session_id, first_section_info or {}


def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
    ensure_directory(opts.output_dir)
    outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve())
    base_options: Dict[str, Any] = {
        "outtmpl": outtmpl,
        "quiet": True,
        "no_warnings": True,
        "noprogress": True,
        "socket_timeout": 30,
        "retries": 10,
        "fragment_retries": 10,
        "http_chunk_size": 10_485_760,
        "restrictfilenames": True,
        "progress_hooks": [] if opts.quiet else [_progress_callback],
    }

    if opts.cookies_path and opts.cookies_path.is_file():
        base_options["cookiefile"] = str(opts.cookies_path)
    else:
        from hydrus_health_check import get_cookies_file_path  # local import
        global_cookies = get_cookies_file_path()
        if global_cookies:
            base_options["cookiefile"] = global_cookies

    if opts.no_playlist:
        base_options["noplaylist"] = True

    if opts.mode == "audio":
        base_options["format"] = opts.ytdl_format or "251/140/bestaudio"
        base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}]
    else:
        base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
        base_options["format_sort"] = ["res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"]

    if opts.clip_sections:
        sections = []
        for section_range in opts.clip_sections.split(','):
            try:
                start_s, end_s = [int(x) for x in section_range.split('-')]
                def _secs_to_hms(s: int) -> str:
                    minutes, seconds = divmod(s, 60)
                    hours, minutes = divmod(minutes, 60)
                    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
                sections.append(f"*{_secs_to_hms(start_s)}-{_secs_to_hms(end_s)}")
            except (ValueError, AttributeError):
                pass
        if sections:
            base_options["download_sections"] = sections
            debug(f"Download sections configured: {', '.join(sections)}")

    if opts.playlist_items:
        base_options["playlist_items"] = opts.playlist_items

    if not opts.quiet:
        debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}")
    return base_options


def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
    queue: List[Dict[str, Any]] = [info]
    seen: set[int] = set()
    while queue:
        current = queue.pop(0)
        obj_id = id(current)
        if obj_id in seen:
            continue
        seen.add(obj_id)
        entries = current.get("entries")
        if isinstance(entries, list):
            for entry in entries:
                queue.append(entry)
        if current.get("requested_downloads") or not entries:
            yield current


def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]:
    requested = entry.get("requested_downloads")
    if isinstance(requested, list):
        for item in requested:
            if isinstance(item, dict):
                fp = item.get("filepath") or item.get("_filename")
                if fp:
                    yield Path(fp)
    for key in ("filepath", "_filename", "filename"):
        value = entry.get(key)
        if value:
            yield Path(value)
    if entry.get("filename"):
        yield output_dir / entry["filename"]


def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]:
    for entry in _iter_download_entries(info):
        for candidate in _candidate_paths(entry, output_dir):
            if candidate.is_file():
                return entry, candidate
            if not candidate.is_absolute():
                maybe = output_dir / candidate
                if maybe.is_file():
                    return entry, maybe
    raise FileNotFoundError("yt-dlp did not report a downloaded media file")


def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
    for payload in [info] + info.get("entries", []):
        if not isinstance(payload, dict):
            continue
        hashes = payload.get("hashes")
        if isinstance(hashes, dict):
            for key in ("sha256", "sha-256", "sha_256"):
                if key in hashes and isinstance(hashes[key], str) and hashes[key].strip():
                    return hashes[key].strip()
        for key in ("sha256", "sha-256", "sha_256"):
            value = payload.get(key)
            if isinstance(value, str) and value.strip():
                return value.strip()
    return None


def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
    try:
        from urllib.parse import urlparse
        import requests
        parsed = urlparse(libgen_url)
        if 'libgen' not in parsed.netloc.lower():
            return None
        if '/file.php' not in parsed.path.lower():
            return None
        session = requests.Session()
        session.headers.update({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'})
        debug(f"Following LibGen redirect chain for: {libgen_url}")
        try:
            response = session.get(libgen_url, timeout=10, allow_redirects=True)
            final_url = response.url
            try:
                from bs4 import BeautifulSoup
                soup = BeautifulSoup(response.content, 'html.parser')
                for link in soup.find_all('a'):
                    href = link.get('href')
                    if href and 'get.php' in href:
                        return urljoin(libgen_url, href)
            except ImportError:
                pass
            if final_url != libgen_url:
                debug(f"LibGen resolved to mirror: {final_url}")
                return final_url
        except requests.RequestException as e:
            log(f"Error following LibGen redirects: {e}", file=sys.stderr)
            try:
                response = session.head(libgen_url, allow_redirects=True, timeout=10)
                if response.url != libgen_url:
                    return response.url
            except:
                pass
        return None
    except Exception as e:
        log(f"Error resolving LibGen URL: {e}", file=sys.stderr)
        return None


def _progress_callback(status: Dict[str, Any]) -> None:
    """Simple progress callback using logger."""
    event = status.get("status")
    if event == "downloading":
        percent = status.get("_percent_str", "?")
        speed = status.get("_speed_str", "?")
        eta = status.get("_eta_str", "?")
        sys.stdout.write(f"\r[download] {percent} at {speed} ETA {eta}   ")
        sys.stdout.flush()
    elif event == "finished":
        sys.stdout.write("\r" + " " * 70 + "\r")
        sys.stdout.flush()
        debug(f"✓ Download finished: {status.get('filename')}")
    elif event in ("postprocessing", "processing"):
        debug(f"Post-processing: {status.get('postprocessor')}")


def _download_direct_file(
    url: str,
    output_dir: Path,
    debug_logger: Optional[DebugLogger] = None,
    quiet: bool = False,
) -> DownloadMediaResult:
    """Download a direct file (PDF, image, document, etc.) without yt-dlp."""
    ensure_directory(output_dir)

    from urllib.parse import unquote, urlparse, parse_qs
    import re
    
    # Extract filename from URL
    parsed_url = urlparse(url)
    url_path = parsed_url.path
    
    # Try to get filename from query parameters first (for LibGen and similar services)
    # e.g., ?filename=Book+Title.pdf or &download=filename.pdf
    filename = None
    if parsed_url.query:
        query_params = parse_qs(parsed_url.query)
        for param_name in ('filename', 'download', 'file', 'name'):
            if param_name in query_params and query_params[param_name]:
                filename = query_params[param_name][0]
                filename = unquote(filename)
                break
    
    # If not found in query params, extract from URL path
    if not filename or not filename.strip():
        filename = url_path.split("/")[-1] if url_path else ""
        filename = unquote(filename)
    
    # Remove query strings from filename if any
    if "?" in filename:
        filename = filename.split("?")[0]
    
    # Try to get real filename from Content-Disposition header (HEAD request)
    try:
        with HTTPClient(timeout=10.0) as client:
            response = client._request("HEAD", url, follow_redirects=True)
            content_disposition = response.headers.get("content-disposition", "")
            if content_disposition:
                # Extract filename from Content-Disposition header
                # Format: attachment; filename="filename.pdf" or filename=filename.pdf
                match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
                if match:
                    extracted_name = match.group(1) or match.group(2)
                    if extracted_name:
                        filename = unquote(extracted_name)
                        if not quiet:
                            debug(f"Filename from Content-Disposition: {filename}")
    except Exception as e:
        if not quiet:
            log(f"Could not get filename from headers: {e}", file=sys.stderr)
    
    # Fallback if we still don't have a good filename
    if not filename or "." not in filename:
        filename = "downloaded_file.bin"

    file_path = output_dir / filename
    progress_bar = ProgressBar()

    if not quiet:
        debug(f"Direct download: {filename}")

    try:
        start_time = time.time()
        downloaded_bytes = [0]
        total_bytes = [0]
        last_progress_time = [start_time]

        def progress_callback(bytes_downloaded: int, content_length: int) -> None:
            downloaded_bytes[0] = bytes_downloaded
            total_bytes[0] = content_length

            now = time.time()
            if now - last_progress_time[0] >= 0.5 and total_bytes[0] > 0:
                elapsed = now - start_time
                percent = (bytes_downloaded / content_length) * 100 if content_length > 0 else 0
                speed = bytes_downloaded / elapsed if elapsed > 0 else 0
                eta_seconds = (content_length - bytes_downloaded) / speed if speed > 0 else 0

                speed_str = progress_bar.format_bytes(speed) + "/s"
                minutes, seconds = divmod(int(eta_seconds), 60)
                hours, minutes = divmod(minutes, 60)
                eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"

                progress_line = progress_bar.format_progress(
                    percent_str=f"{percent:.1f}%",
                    downloaded=bytes_downloaded,
                    total=content_length,
                    speed_str=speed_str,
                    eta_str=eta_str,
                )
                if not quiet:
                    debug(progress_line)
                last_progress_time[0] = now

        with HTTPClient(timeout=30.0) as client:
            client.download(url, str(file_path), progress_callback=progress_callback)

        elapsed = time.time() - start_time
        avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
        if not quiet:
            debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}")

        # For direct file downloads, create minimal info dict without filename as title
        # This prevents creating duplicate title: tags when filename gets auto-generated
        # We'll add title back later only if we couldn't extract meaningful tags
        info = {
            "id": filename.rsplit(".", 1)[0],
            "ext": filename.rsplit(".", 1)[1] if "." in filename else "bin",
            "webpage_url": url,
        }

        hash_value = None
        try:
            hash_value = sha256_file(file_path)
        except Exception:
            pass

        tags = []
        if extract_ytdlp_tags:
            try:
                tags = extract_ytdlp_tags(info)
            except Exception as e:
                log(f"Error extracting tags: {e}", file=sys.stderr)

        # Only use filename as a title tag if we couldn't extract any meaningful tags
        # This prevents duplicate title: tags when the filename could be mistaken for metadata
        if not any(t.startswith('title:') for t in tags):
            # Re-extract tags with filename as title only if needed
            info['title'] = filename
            tags = []
            if extract_ytdlp_tags:
                try:
                    tags = extract_ytdlp_tags(info)
                except Exception as e:
                    log(f"Error extracting tags with filename: {e}", file=sys.stderr)

        if debug_logger is not None:
            debug_logger.write_record(
                "direct-file-downloaded",
                {"url": url, "path": str(file_path), "hash": hash_value},
            )

        return DownloadMediaResult(
            path=file_path,
            info=info,
            tag=tags,
            source_url=url,
            hash_value=hash_value,
        )

    except (httpx.HTTPError, httpx.RequestError) as exc:
        log(f"Download error: {exc}", file=sys.stderr)
        if debug_logger is not None:
            debug_logger.write_record(
                "exception",
                {"phase": "direct-file", "url": url, "error": str(exc)},
            )
        raise DownloadError(f"Failed to download {url}: {exc}") from exc
    except Exception as exc:
        log(f"Error downloading file: {exc}", file=sys.stderr)
        if debug_logger is not None:
            debug_logger.write_record(
                "exception",
                {
                    "phase": "direct-file",
                    "url": url,
                    "error": str(exc),
                    "traceback": traceback.format_exc(),
                },
            )
        raise DownloadError(f"Error downloading file: {exc}") from exc


def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]:
    """Probe URL to extract metadata WITHOUT downloading.
    
    Args:
        url: URL to probe
        no_playlist: If True, ignore playlists and probe only the single video
        timeout_seconds: Max seconds to wait for probe (default 15s)
    
    Returns:
        Dict with keys: extractor, title, entries (if playlist), duration, etc.
        Returns None if not supported by yt-dlp or on timeout.
    """
    if not is_url_supported_by_ytdlp(url):
        return None
    
    # Wrap probe in timeout to prevent hanging on large playlists
    import threading
    from typing import cast
    
    result_container: List[Optional[Any]] = [None, None]  # [result, error]
    
    def _do_probe() -> None:
        try:
            _ensure_yt_dlp_ready()
            
            assert yt_dlp is not None
            # Extract info without downloading
            # Use extract_flat='in_playlist' to get full metadata for playlist items
            ydl_opts = {
                "quiet": True,  # Suppress all output
                "no_warnings": True,
                "socket_timeout": 10,
                "retries": 2,  # Reduce retries for faster timeout
                "skip_download": True,  # Don't actually download
                "extract_flat": "in_playlist",  # Get playlist with metadata for each entry
                "noprogress": True,  # No progress bars
            }
            
            # Add cookies if available (lazy import to avoid circular dependency)
            from hydrus_health_check import get_cookies_file_path  # local import

            global_cookies = get_cookies_file_path()
            if global_cookies:
                ydl_opts["cookiefile"] = global_cookies
            
            # Add no_playlist option if specified
            if no_playlist:
                ydl_opts["noplaylist"] = True
            
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:  # type: ignore[arg-type]
                info = ydl.extract_info(url, download=False)
            
            if not isinstance(info, dict):
                result_container[0] = None
                return
            
            # Extract relevant fields
            result_container[0] = {
                "extractor": info.get("extractor", ""),
                "title": info.get("title", ""),
                "entries": info.get("entries", []),  # Will be populated if playlist
                "duration": info.get("duration"),
                "uploader": info.get("uploader"),
                "description": info.get("description"),
                "url": url,
            }
        except Exception as exc:
            log(f"Probe error for {url}: {exc}")
            result_container[1] = exc
    
    thread = threading.Thread(target=_do_probe, daemon=False)
    thread.start()
    thread.join(timeout=timeout_seconds)
    
    if thread.is_alive():
        # Probe timed out - return None to fall back to direct download
        debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download")
        return None
    
    if result_container[1] is not None:
        # Probe error - return None to proceed anyway
        return None
    
    return cast(Optional[Dict[str, Any]], result_container[0])


def download_media(
    opts: DownloadOptions,
    *,
    debug_logger: Optional[DebugLogger] = None,
) -> DownloadMediaResult:
    """Download media from URL using yt-dlp or direct HTTP download.
    
    Args:
        opts: DownloadOptions with url, mode, output_dir, etc.
        debug_logger: Optional debug logger for troubleshooting
        
    Returns:
        DownloadMediaResult with path, info, tags, hash
        
    Raises:
        DownloadError: If download fails
    """
    # Handle LibGen url specially
    # file.php redirects to mirrors, get.php is direct from modern API
    if 'libgen' in opts.url.lower():
        if '/get.php' in opts.url.lower():
            # Modern API get.php links are direct downloads from mirrors (not file redirects)
            if not opts.quiet:
                log(f"Detected LibGen get.php URL, downloading directly...")
            if debug_logger is not None:
                debug_logger.write_record("libgen-direct", {"url": opts.url})
            return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet)
        elif '/file.php' in opts.url.lower():
            # Old-style file.php redirects to mirrors, we need to resolve
            if not opts.quiet:
                log(f"Detected LibGen file.php URL, resolving to actual mirror...")
            actual_url = _get_libgen_download_url(opts.url)
            if actual_url and actual_url != opts.url:
                if not opts.quiet:
                    log(f"Resolved LibGen URL to mirror: {actual_url}")
                opts.url = actual_url
                # After resolution, this will typically be an onion link or direct file
                # Skip yt-dlp for this (it won't support onion/mirrors), go direct
                if debug_logger is not None:
                    debug_logger.write_record("libgen-resolved", {"original": opts.url, "resolved": actual_url})
                return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet)
            else:
                if not opts.quiet:
                    log(f"Could not resolve LibGen URL, trying direct download anyway", file=sys.stderr)
                if debug_logger is not None:
                    debug_logger.write_record("libgen-resolve-failed", {"url": opts.url})
                return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet)
    
    # Handle GoFile shares with a dedicated resolver before yt-dlp/direct fallbacks
    try:
        netloc = urlparse(opts.url).netloc.lower()
    except Exception:
        netloc = ""
    if "gofile.io" in netloc:
        msg = "GoFile links are currently unsupported"
        if not opts.quiet:
            debug(msg)
        if debug_logger is not None:
            debug_logger.write_record("gofile-unsupported", {"url": opts.url})
        raise DownloadError(msg)

    # Determine if yt-dlp should be used
    ytdlp_supported = is_url_supported_by_ytdlp(opts.url)
    if ytdlp_supported:
        # Skip probe for playlists with item selection (probe can hang on large playlists)
        # Just proceed straight to download which will handle item selection
        if opts.playlist_items:
            debug(f"Skipping probe for playlist (item selection: {opts.playlist_items}), proceeding with download")
            probe_result = {"url": opts.url}  # Minimal probe result
        else:
            probe_result = probe_url(opts.url, no_playlist=opts.no_playlist, timeout_seconds=15)
        
        if probe_result is None:
            if not opts.quiet:
                log(f"URL supported by yt-dlp but no media detected, falling back to direct download: {opts.url}")
            if debug_logger is not None:
                debug_logger.write_record("ytdlp-skip-no-media", {"url": opts.url})
            return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet)
    else:
        if not opts.quiet:
            log(f"URL not supported by yt-dlp, trying direct download: {opts.url}")
        if debug_logger is not None:
            debug_logger.write_record("direct-file-attempt", {"url": opts.url})
        return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet)

    _ensure_yt_dlp_ready()

    ytdl_options = _build_ytdlp_options(opts)
    if not opts.quiet:
        debug(f"Starting yt-dlp download: {opts.url}")
    if debug_logger is not None:
        debug_logger.write_record("ytdlp-start", {"url": opts.url})

    assert yt_dlp is not None
    try:
        # Debug: show what options we're using
        if not opts.quiet:
            if ytdl_options.get("download_sections"):
                debug(f"[yt-dlp] download_sections: {ytdl_options['download_sections']}")
            debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}")
        
        # Use subprocess when download_sections are present (Python API doesn't support them properly)
        session_id = None
        first_section_info = {}
        if ytdl_options.get("download_sections"):
            session_id, first_section_info = _download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", []), quiet=opts.quiet)
            info = None
        else:
            with yt_dlp.YoutubeDL(ytdl_options) as ydl:  # type: ignore[arg-type]
                info = ydl.extract_info(opts.url, download=True)
    except Exception as exc:
        log(f"yt-dlp failed: {exc}", file=sys.stderr)
        if debug_logger is not None:
            debug_logger.write_record(
                "exception",
                {
                    "phase": "yt-dlp",
                    "error": str(exc),
                    "traceback": traceback.format_exc(),
                },
            )
        raise DownloadError("yt-dlp download failed") from exc

    # If we used subprocess, we need to find the file manually
    if info is None:
        # Find files created/modified during this download (after we started)
        # Look for files matching the expected output template pattern
        try:
            import glob
            import time
            import re
            
            # Get the expected filename pattern from outtmpl
            # For sections: "C:\path\{session_id}.section_1_of_3.ext", etc.
            # For non-sections: "C:\path\title.ext"
            
            # Wait a moment to ensure files are fully written
            time.sleep(0.5)
            
            # List all files in output_dir, sorted by modification time
            files = sorted(opts.output_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True)
            if not files:
                raise FileNotFoundError(f"No files found in {opts.output_dir}")
            
            # If we downloaded sections, look for files with the session_id pattern
            if opts.clip_sections and session_id:
                # Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc.
                section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)\.')
                matching_files = [f for f in files if section_pattern.search(f.name)]
                
                if matching_files:
                    # Sort by section number to ensure correct order
                    def extract_section_num(path: Path) -> int:
                        match = section_pattern.search(path.name)
                        return int(match.group(1)) if match else 999
                    
                    matching_files.sort(key=extract_section_num)
                    debug(f"Found {len(matching_files)} section file(s) matching pattern")
                    
                    # Now rename section files to use hash-based names
                    # This ensures unique filenames for each section content
                    renamed_files = []
                    
                    for idx, section_file in enumerate(matching_files, 1):
                        try:
                            # Calculate hash for the file
                            file_hash = sha256_file(section_file)
                            ext = section_file.suffix
                            new_name = f"{file_hash}{ext}"
                            new_path = opts.output_dir / new_name
                            
                            if new_path.exists() and new_path != section_file:
                                # If file with same hash exists, use it and delete the temp one
                                debug(f"File with hash {file_hash} already exists, using existing file.")
                                try:
                                    section_file.unlink()
                                except OSError:
                                    pass
                                renamed_files.append(new_path)
                            else:
                                section_file.rename(new_path)
                                debug(f"Renamed section file: {section_file.name} → {new_name}")
                                renamed_files.append(new_path)
                        except Exception as e:
                            debug(f"Failed to process section file {section_file.name}: {e}")
                            renamed_files.append(section_file)
                    
                    media_path = renamed_files[0]
                    media_paths = renamed_files
                    if not opts.quiet:
                        debug(f"✓ Downloaded {len(media_paths)} section file(s) (session: {session_id})")
                else:
                    # Fallback to most recent file if pattern not found
                    media_path = files[0]
                    media_paths = None
                    if not opts.quiet:
                        debug(f"✓ Downloaded section file (pattern not found): {media_path.name}")
            else:
                # No sections, just take the most recent file
                media_path = files[0]
                media_paths = None
            
            if not opts.quiet:
                debug(f"✓ Downloaded: {media_path.name}")
            if debug_logger is not None:
                debug_logger.write_record("ytdlp-file-found", {"path": str(media_path)})
        except Exception as exc:
            log(f"Error finding downloaded file: {exc}", file=sys.stderr)
            if debug_logger is not None:
                debug_logger.write_record(
                    "exception",
                    {"phase": "find-file", "error": str(exc)},
                )
            raise DownloadError(str(exc)) from exc
        
        # Create result with minimal data extracted from filename
        file_hash = sha256_file(media_path)
        
        # For section downloads, create tags with the title and build proper info dict
        tags = []
        title = ''
        if first_section_info:
            title = first_section_info.get('title', '')
            if title:
                tags.append(f'title:{title}')
                debug(f"Added title tag for section download: {title}")
        
        # Build info dict - always use extracted title if available, not hash
        if first_section_info:
            info_dict = first_section_info
        else:
            info_dict = {
                "id": media_path.stem,
                "title": title or media_path.stem,
                "ext": media_path.suffix.lstrip(".")
            }
        
        return DownloadMediaResult(
            path=media_path,
            info=info_dict,
            tag=tags,
            source_url=opts.url,
            hash_value=file_hash,
            paths=media_paths,  # Include all section files if present
        )

    if not isinstance(info, dict):
        log(f"Unexpected yt-dlp response: {type(info)}", file=sys.stderr)
        raise DownloadError("Unexpected yt-dlp response type")

    info_dict: Dict[str, Any] = info
    if debug_logger is not None:
        debug_logger.write_record(
            "ytdlp-info",
            {
                "keys": sorted(info_dict.keys()),
                "is_playlist": bool(info_dict.get("entries")),
            },
        )

    try:
        entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir)
    except FileNotFoundError as exc:
        log(f"Error: {exc}", file=sys.stderr)
        if debug_logger is not None:
            debug_logger.write_record(
                "exception",
                {"phase": "resolve-path", "error": str(exc)},
            )
        raise DownloadError(str(exc)) from exc

    if debug_logger is not None:
        debug_logger.write_record(
            "resolved-media",
            {"path": str(media_path), "entry_keys": sorted(entry.keys())},
        )

    # Extract hash from metadata or compute
    hash_value = _extract_sha256(entry) or _extract_sha256(info_dict)
    if not hash_value:
        try:
            hash_value = sha256_file(media_path)
        except OSError as exc:
            if debug_logger is not None:
                debug_logger.write_record(
                    "hash-error",
                    {"path": str(media_path), "error": str(exc)},
                )

    # Extract tags using metadata.py
    tags = []
    if extract_ytdlp_tags:
        try:
            tags = extract_ytdlp_tags(entry)
        except Exception as e:
            log(f"Error extracting tags: {e}", file=sys.stderr)

    source_url = (
        entry.get("webpage_url")
        or entry.get("original_url")
        or entry.get("url")
    )

    if not opts.quiet:
        debug(f"✓ Downloaded: {media_path.name} ({len(tags)} tags)")
    if debug_logger is not None:
        debug_logger.write_record(
            "downloaded",
            {
                "path": str(media_path),
                "tag_count": len(tags),
                "source_url": source_url,
                "sha256": hash_value,
            },
        )

    return DownloadMediaResult(
        path=media_path,
        info=entry,
        tag=tags,
        source_url=source_url,
        hash_value=hash_value,
    )


# Timeout handler to prevent yt-dlp hangs
def _download_with_timeout(opts: DownloadOptions, timeout_seconds: int = 300) -> Any:
    """Download with timeout protection.
    
    Args:
        opts: DownloadOptions
        timeout_seconds: Max seconds to wait (default 300s = 5 min)
        
    Returns:
        DownloadMediaResult
        
    Raises:
        DownloadError: If timeout exceeded
    """
    import threading
    from typing import cast
    
    result_container: List[Optional[Any]] = [None, None]  # [result, error]
    
    def _do_download() -> None:
        try:
            result_container[0] = download_media(opts)
        except Exception as e:
            result_container[1] = e
    
    thread = threading.Thread(target=_do_download, daemon=False)
    thread.start()
    thread.join(timeout=timeout_seconds)
    
    if thread.is_alive():
        # Thread still running - timeout
        raise DownloadError(f"Download timeout after {timeout_seconds} seconds for {opts.url}")
    
    if result_container[1] is not None:
        raise cast(Exception, result_container[1])
    
    if result_container[0] is None:
        raise DownloadError(f"Download failed for {opts.url}")
    
    return cast(Any, result_container[0])


class Download_Media(Cmdlet):
    """Class-based download-media cmdlet - yt-dlp only, streaming sites."""

    def __init__(self) -> None:
        """Initialize download-media cmdlet."""
        super().__init__(
            name="download-media",
            summary="Download media from streaming sites (YouTube, Twitch, etc.)",
            usage="download-media <url> [options] or search-file | download-media [options]",
            alias=[""],
            arg=[
                SharedArgs.URL,
                CmdletArg(name="audio", type="flag", alias="a", description="Download audio only"),
                CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"),
                CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS"),
                CmdletArg(name="item", type="string", description="Item selection for playlists/formats"),
            ],
            detail=["Download media from streaming sites using yt-dlp.", "For direct file downloads, use download-file."],
            exec=self.run,
        )
        self.register()

    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Main execution method."""
        stage_ctx = pipeline_context.get_stage_context()
        in_pipeline = stage_ctx is not None and getattr(stage_ctx, "total_stages", 1) > 1
        if in_pipeline and isinstance(config, dict):
            config["_quiet_background_output"] = True
        return self._run_impl(result, args, config)

    def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Main download implementation for yt-dlp-supported url."""
        try:
            debug("Starting download-media")

            # Parse arguments
            parsed = parse_cmdlet_args(args, self)

            # Extract options
            raw_url = parsed.get("url", [])
            if isinstance(raw_url, str):
                raw_url = [raw_url]
            
            # If no url provided via args, try to extract from piped result
            if not raw_url and result:
                from ._shared import get_field
                # Handle single result or list of results
                results_to_check = result if isinstance(result, list) else [result]
                for item in results_to_check:
                    # Try to get URL from various possible fields
                    url = get_field(item, "url") or get_field(item, "target")
                    if url:
                        raw_url.append(url)

            # Filter to yt-dlp supported url only
            supported_url = [
                url for url in raw_url
                if is_url_supported_by_ytdlp(url)
            ]

            if not supported_url:
                log("No yt-dlp-supported url to download", file=sys.stderr)
                return 1

            # Log unsupported url if any
            unsupported = set(raw_url) - set(supported_url)
            if unsupported:
                debug(f"Skipping {len(unsupported)} unsupported url (use download-file for direct downloads)")

            # Get output directory
            final_output_dir = self._resolve_output_dir(parsed, config)
            if not final_output_dir:
                return 1

            debug(f"Output directory: {final_output_dir}")

            # Get other options
            clip_spec = parsed.get("clip")

            mode = "audio" if parsed.get("audio") else "video"

            # Parse clip range if specified
            clip_range = None
            if clip_spec:
                clip_range = self._parse_time_range(clip_spec)
                if not clip_range:
                    log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
                    return 1

            # Check if we need to show format selection
            playlist_items = str(parsed.get("item")) if parsed.get("item") else None
            ytdl_format = parsed.get("format")
            
            # If no -item, no explicit -format specified, and single URL, show the format table.
            # Do NOT stop to show formats when -audio is used (auto-pick) or when -clip is used.
            if (
                mode != "audio"
                and not clip_spec
                and not playlist_items
                and not ytdl_format
                and len(supported_url) == 1
            ):
                url = supported_url[0]
                formats = list_formats(url, no_playlist=False)
                
                if formats and len(formats) > 1:
                    # Filter formats: multiple videos (640x+, one per resolution tier) + 1 best audio
                    video_formats = []
                    audio_formats = []
                    
                    for fmt in formats:
                        width = fmt.get("width") or 0
                        height = fmt.get("height") or 0
                        vcodec = fmt.get("vcodec", "none")
                        acodec = fmt.get("acodec", "none")
                        
                        # Classify as video or audio
                        if vcodec != "none" and acodec == "none" and width >= 640:
                            video_formats.append(fmt)
                        elif acodec != "none" and vcodec == "none":
                            audio_formats.append(fmt)
                    
                    # Group videos by resolution and select best format per resolution
                    filtered_formats = []
                    if video_formats:
                        # Group by height (resolution tier)
                        from collections import defaultdict
                        by_resolution = defaultdict(list)
                        for f in video_formats:
                            height = f.get("height") or 0
                            by_resolution[height].append(f)
                        
                        # For each resolution, prefer AV1, then highest bitrate
                        for height in sorted(by_resolution.keys(), reverse=True):
                            candidates = by_resolution[height]
                            av1_formats = [f for f in candidates if "av01" in f.get("vcodec", "")]
                            if av1_formats:
                                best = max(av1_formats, key=lambda f: f.get("tbr") or 0)
                            else:
                                best = max(candidates, key=lambda f: f.get("tbr") or 0)
                            filtered_formats.append(best)
                    
                    # Select best audio: highest bitrate (any format)
                    if audio_formats:
                        best_audio = max(audio_formats, key=lambda f: f.get("tbr") or f.get("abr") or 0)
                        filtered_formats.append(best_audio)
                    
                    if not filtered_formats:
                        # Fallback to all formats if filtering resulted in nothing
                        filtered_formats = formats
                    
                    debug(f"Filtered to {len(filtered_formats)} formats from {len(formats)} total")
                    
                    # Show format selection table
                    log(f"Available formats for {url}:", file=sys.stderr)
                    log("", file=sys.stderr)
                    
                    # Build the base command that will be replayed with @N selection
                    # Include any additional args from the original command
                    base_cmd = f'download-media "{url}"'
                    # Preserve any additional pipeline stages if this is in a pipeline
                    remaining_args = [arg for arg in args if arg not in [url] and not arg.startswith('-')]
                    if remaining_args:
                        base_cmd += ' ' + ' '.join(remaining_args)
                    
                    # Create result table for display
                    table = ResultTable()
                    table.title = f"Available formats for {url}"
                    table.set_source_command("download-media", [url])
                    
                    # Collect results for table
                    results_list = []
                    
                    # Emit format results for selection
                    for idx, fmt in enumerate(filtered_formats, 1):
                        resolution = fmt.get("resolution", "")
                        ext = fmt.get("ext", "")
                        vcodec = fmt.get("vcodec", "none")
                        acodec = fmt.get("acodec", "none")
                        filesize = fmt.get("filesize")
                        format_id = fmt.get("format_id", "")
                        
                        # Format size
                        size_str = ""
                        if filesize:
                            size_mb = filesize / (1024 * 1024)
                            size_str = f"{size_mb:.1f}MB"
                        
                        # Build format description
                        desc_parts = []
                        if resolution and resolution != "audio only":
                            desc_parts.append(resolution)
                        if ext:
                            desc_parts.append(ext.upper())
                        if vcodec != "none":
                            desc_parts.append(f"v:{vcodec}")
                        if acodec != "none":
                            desc_parts.append(f"a:{acodec}")
                        if size_str:
                            desc_parts.append(size_str)
                        
                        format_desc = " | ".join(desc_parts)
                        
                        # Build format dict for emission and table
                        format_dict = {
                            "table": "download-media",
                            "title": f"Format {format_id}",
                            "url": url,
                            "target": url,
                            "detail": format_desc,
                            "annotations": [ext, resolution] if resolution else [ext],
                            "media_kind": "format",
                            "cmd": base_cmd,
                            "columns": [
                                ("#", str(idx)),
                                ("ID", format_id),
                                ("Resolution", resolution or "N/A"),
                                ("Ext", ext),
                                ("Video", vcodec),
                                ("Audio", acodec),
                                ("Size", size_str or "N/A"),
                            ],
                            "full_metadata": {
                                "format_id": format_id,
                                "url": url,
                                "item_selector": format_id,
                            },
                            "_selection_args": ["-format", format_id]
                        }
                        
                        # Add to results list and table (don't emit - formats should wait for @N selection)
                        results_list.append(format_dict)
                        table.add_result(format_dict)
                    
                    # Render and display the table
                    # Table is displayed by pipeline runner via set_current_stage_table
                    
                    # Set the result table so it displays and is available for @N selection
                    pipeline_context.set_current_stage_table(table)
                    pipeline_context.set_last_result_table(table, results_list)
                    
                    log(f"", file=sys.stderr)
                    log(f"Use: @N | download-media to select and download format", file=sys.stderr)
                    return 0
            
            # Download each URL
            downloaded_count = 0
            clip_sections_spec = self._build_clip_sections_spec(clip_range)
            quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False

            for url in supported_url:
                try:
                    debug(f"Processing: {url}")

                    # If playlist_items is specified but looks like a format ID (e.g. from table selection),
                    # treat it as a format selector instead of playlist items.
                    # This handles the case where @N selection passes -item <format_id>
                    actual_format = ytdl_format
                    actual_playlist_items = playlist_items
                    
                    if playlist_items and not ytdl_format:
                        # Heuristic: if it contains non-numeric chars (excluding ranges/commas)
                        # it is likely a format ID (e.g. '140-drc', 'best', '137+140')
                        import re
                        if re.search(r'[^0-9,-]', playlist_items):
                            actual_format = playlist_items
                            actual_playlist_items = None

                    # Auto-pick best audio format when -audio is used and no explicit format is given.
                    if mode == "audio" and not actual_format:
                        chosen = None
                        formats = list_formats(url, no_playlist=False, playlist_items=actual_playlist_items)
                        if formats:
                            chosen = _pick_best_audio_format_id(formats)
                        actual_format = chosen or "bestaudio/best"

                    opts = DownloadOptions(
                        url=url,
                        mode=mode,
                        output_dir=final_output_dir,
                        ytdl_format=actual_format,
                        clip_sections=clip_sections_spec,
                        playlist_items=actual_playlist_items,
                        quiet=quiet_mode,
                        no_playlist=False,
                    )

                    # Use timeout wrapper to prevent hanging
                    debug(f"Starting download with 5-minute timeout...")
                    result_obj = _download_with_timeout(opts, timeout_seconds=300)
                    debug(f"Download completed, building pipe object...")
                    pipe_obj_dict = self._build_pipe_object(result_obj, url, opts)
                    debug(f"Emitting result to pipeline...")
                    pipeline_context.emit(pipe_obj_dict)
                    
                    # Automatically register url with local library
                    if pipe_obj_dict.get("url"):
                        pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
                        register_url_with_local_library(pipe_obj, config)
                    
                    downloaded_count += 1
                    debug("✓ Downloaded and emitted")

                except DownloadError as e:
                    log(f"Download failed for {url}: {e}", file=sys.stderr)
                except Exception as e:
                    log(f"Error processing {url}: {e}", file=sys.stderr)

            if downloaded_count > 0:
                debug(f"✓ Successfully processed {downloaded_count} URL(s)")
                return 0

            log("No downloads completed", file=sys.stderr)
            return 1

        except Exception as e:
            log(f"Error in download-media: {e}", file=sys.stderr)
            return 1

    def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
        """Resolve the output directory from storage location or config."""
        storage_location = parsed.get("storage")

        # Priority 1: --storage flag
        if storage_location:
            try:
                return SharedArgs.resolve_storage(storage_location)
            except Exception as e:
                log(f"Invalid storage location: {e}", file=sys.stderr)
                return None

        # Priority 2: Config outfile
        if config and config.get("outfile"):
            try:
                return Path(config["outfile"]).expanduser()
            except Exception:
                pass

        # Priority 3: Default (home/Videos)
        final_output_dir = Path.home() / "Videos"
        debug(f"Using default directory: {final_output_dir}")

        # Ensure directory exists
        try:
            final_output_dir.mkdir(parents=True, exist_ok=True)
        except Exception as e:
            log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr)
            return None

        return final_output_dir

    def _parse_time_range(self, spec: str) -> Optional[tuple]:
        """Parse 'MM:SS-MM:SS' format into (start_seconds, end_seconds)."""
        try:
            parts = spec.split("-")
            if len(parts) != 2:
                return None
            
            def to_seconds(ts: str) -> int:
                ts = ts.strip()
                if ":" in ts:
                    mm, ss = ts.split(":")
                    return int(mm) * 60 + int(ss)
                return int(ts)
            
            start = to_seconds(parts[0])
            end = to_seconds(parts[1])
            return (start, end) if start < end else None
        except Exception:
            return None

    def _build_clip_sections_spec(
        self,
        clip_range: Optional[tuple],
    ) -> Optional[str]:
        """Convert parsed clip range into downloader spec (seconds)."""
        ranges: List[str] = []
        if clip_range:
            ranges.append(f"{clip_range[0]}-{clip_range[1]}")
        return ",".join(ranges) if ranges else None

    def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]:
        """Create a PipeObject-compatible dict from a DownloadMediaResult."""
        info: Dict[str, Any] = download_result.info if isinstance(download_result.info, dict) else {}
        media_path = Path(download_result.path)
        hash_value = download_result.hash_value or self._compute_file_hash(media_path)
        title = info.get("title") or media_path.stem
        tag = list(download_result.tag or [])
        
        # Add title tag for searchability
        if title and f"title:{title}" not in tag:
            tag.insert(0, f"title:{title}")

        # Build a single canonical URL field; prefer yt-dlp provided webpage_url or info.url,
        # but fall back to the original requested URL.  If multiple unique urls are available,
        # join them into a comma-separated string.
        urls_to_consider: List[str] = []
        try:
            page_url = info.get("webpage_url") or info.get("url")
            if page_url:
                urls_to_consider.append(str(page_url))
        except Exception:
            pass
        if url:
            urls_to_consider.append(str(url))

        seen_urls: List[str] = []
        for u in urls_to_consider:
            if u and u not in seen_urls:
                seen_urls.append(u)
        final_url = ",".join(seen_urls) if seen_urls else None

        # Construct canonical PipeObject dict: hash, store, path, url, title, tags
        # Prefer explicit backend names (storage_name/storage_location). If none, default to PATH
        # which indicates the file is available at a filesystem path and hasn't been added to a backend yet.
        return {
            "path": str(media_path),
            "hash": hash_value,
            "title": title,
            "url": final_url,
            "tag": tag,
            "action": "cmdlet:download-media",
            # download_mode removed (deprecated), keep media_kind
            "store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH",
            "media_kind": "video" if opts.mode == "video" else "audio",
        }

    def _compute_file_hash(self, filepath: Path) -> str:
        """Compute SHA256 hash of a file."""
        import hashlib
        sha256_hash = hashlib.sha256()
        with open(filepath, "rb") as f:
            for byte_block in iter(lambda: f.read(4096), b""):
                sha256_hash.update(byte_block)
        return sha256_hash.hexdigest()


# Module-level singleton registration
CMDLET = Download_Media()