Medios-Macina/cmdlet/download_file.py

"""Generic file downloader.

Supports:
- Direct HTTP file URLs (PDFs, images, documents; non-yt-dlp)
- Piped provider items (uses provider.download when available)

No streaming site logic; use download-media for yt-dlp/streaming.
"""

from __future__ import annotations

import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence
from urllib.parse import urlparse

from SYS.download import DownloadError, _download_direct_file
from SYS.logger import log, debug
from SYS.pipeline_progress import PipelineProgress
import pipeline as pipeline_context

from . import _shared as sh

Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
register_url_with_local_library = sh.register_url_with_local_library
coerce_to_pipe_object = sh.coerce_to_pipe_object
get_field = sh.get_field


class Download_File(Cmdlet):
    """Class-based download-file cmdlet - direct HTTP downloads."""

    def __init__(self) -> None:
        """Initialize download-file cmdlet."""
        super().__init__(
            name="download-file",
            summary="Download files via HTTP or provider handlers",
            usage="download-file <url> [-path DIR] [options] OR @N | download-file [-path DIR] [options]",
            alias=["dl-file", "download-http"],
            arg=[
                SharedArgs.URL,
                SharedArgs.PATH,
                # Prefer -path for output directory to match other cmdlets; keep -output for backwards compatibility.
                CmdletArg(name="-output", type="string", alias="o", description="(deprecated) Output directory (use -path instead)"),

            ],
            detail=[
                "Download files directly via HTTP without yt-dlp processing.",
                "For streaming sites, use download-media.",
                "For Internet Archive item pages (archive.org/details/...), shows a selectable file list; pick with @N to download.",
            ],
            exec=self.run,
        )
        self.register()

    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Main execution method."""
        return self._run_impl(result, args, config)

    @staticmethod
    def _normalize_urls(parsed: Dict[str, Any]) -> List[str]:
        raw_url = parsed.get("url", [])
        if isinstance(raw_url, str):
            raw_url = [raw_url]

        expanded_urls: List[str] = []
        for u in (raw_url or []):
            if u is None:
                continue
            s = str(u).strip()
            if not s:
                continue
            if "," in s:
                parts = [p.strip() for p in s.split(",")]
                expanded_urls.extend([p for p in parts if p])
            else:
                expanded_urls.append(s)

        return expanded_urls

    @staticmethod
    def _collect_piped_items_if_no_urls(result: Any, raw_urls: Sequence[str]) -> List[Any]:
        if raw_urls:
            return []
        if isinstance(result, list):
            return list(result)
        if result:
            return [result]
        return []

    @staticmethod
    def _safe_total_items(raw_urls: Sequence[str], piped_items: Sequence[Any]) -> int:
        try:
            return int(len(raw_urls or []) + len(piped_items or []))
        except Exception:
            return 1

    @staticmethod
    def _build_preview(raw_urls: Sequence[str], piped_items: Sequence[Any], total_items: int) -> List[Any]:
        try:
            preview: List[Any] = []
            preview.extend(list(raw_urls or [])[: max(0, total_items)])
            if len(preview) < total_items:
                preview.extend(list(piped_items or [])[: max(0, total_items - len(preview))])
            return preview
        except Exception:
            return []

    @staticmethod
    def _load_provider_registry() -> Dict[str, Any]:
        try:
            from ProviderCore.registry import (
                get_search_provider as _get_search_provider,
                get_provider as _get_provider,
                match_provider_name_for_url as _match_provider_name_for_url,
                SearchResult as _SearchResult,
            )

            return {
                "get_search_provider": _get_search_provider,
                "get_provider": _get_provider,
                "match_provider_name_for_url": _match_provider_name_for_url,
                "SearchResult": _SearchResult,
            }

        except Exception:
            return {
                "get_search_provider": None,
                "get_provider": None,
                "match_provider_name_for_url": None,
                "SearchResult": None,
            }

    @staticmethod
    def _maybe_show_internetarchive_formats(
        *,
        raw_urls: Sequence[str],
        piped_items: Sequence[Any],
        parsed: Dict[str, Any],
        config: Dict[str, Any],
        quiet_mode: bool,
    ) -> Optional[int]:
        """If the input is an IA item page, show a selectable formats table.

        Returns an exit code when handled; otherwise None.
        """
        if quiet_mode:
            return None

        try:
            total_inputs = int(len(raw_urls or []) + len(piped_items or []))
        except Exception:
            total_inputs = 0

        if total_inputs != 1:
            return None

        item = piped_items[0] if piped_items else None
        target = ""
        if item is not None:
            try:
                target = str(get_field(item, "path") or get_field(item, "url") or "").strip()
            except Exception:
                target = ""
        if not target and raw_urls:
            target = str(raw_urls[0]).strip()
        if not target:
            return None

        try:
            from Provider import internetarchive as ia
        except Exception:
            return None

        identifier = ""
        try:
            md = get_field(item, "full_metadata") if item is not None else None
            if isinstance(md, dict):
                identifier = str(md.get("identifier") or "").strip()
        except Exception:
            identifier = ""
        if not identifier:
            try:
                identifier = str(ia.extract_identifier(target) or "").strip()
            except Exception:
                identifier = ""
        if not identifier:
            return None

        # Only show picker for item pages (details); direct download URLs should download immediately.
        try:
            if not ia.is_details_url(target):
                return None
        except Exception:
            return None

        try:
            files = ia.list_download_files(identifier)
        except Exception as exc:
            log(f"download-file: Internet Archive lookup failed: {exc}", file=sys.stderr)
            return 1

        if not files:
            log("download-file: Internet Archive item has no downloadable files", file=sys.stderr)
            return 1

        title = ""
        try:
            title = str(get_field(item, "title") or "").strip() if item is not None else ""
        except Exception:
            title = ""
        table_title = f"Internet Archive: {title}".strip().rstrip(":") if title else f"Internet Archive: {identifier}"

        try:
            from result_table import ResultTable
        except Exception as exc:
            log(f"download-file: ResultTable unavailable: {exc}", file=sys.stderr)
            return 1

        base_args: List[str] = []
        out_arg = parsed.get("path") or parsed.get("output")
        if out_arg:
            base_args.extend(["-path", str(out_arg)])

        table = ResultTable(table_title).set_preserve_order(True)
        table.set_table("internetarchive.formats")
        table.set_source_command("download-file", base_args)

        rows: List[Dict[str, Any]] = []
        for f in files:
            name = str(f.get("name") or "").strip()
            if not name:
                continue
            fmt = str(f.get("format") or "").strip()
            src = str(f.get("source") or "").strip()
            direct_url = str(f.get("direct_url") or "").strip()
            if not direct_url:
                continue

            size_val: Any = f.get("size")
            try:
                size_val = int(size_val) if size_val not in (None, "") else ""
            except Exception:
                pass

            row_item: Dict[str, Any] = {
                "table": "internetarchive",
                "title": fmt or name,
                "path": direct_url,
                "url": direct_url,
                "columns": [
                    ("Format", fmt),
                    ("Name", name),
                    ("Size", size_val),
                    ("Source", src),
                ],
                "_selection_args": [direct_url],
                "full_metadata": {
                    "identifier": identifier,
                    "name": name,
                    "format": fmt,
                    "source": src,
                    "size": f.get("size"),
                },
            }
            rows.append(row_item)
            table.add_result(row_item)

        if not rows:
            log("download-file: no downloadable files found for this item", file=sys.stderr)
            return 1

        try:
            pipeline_context.set_last_result_table(table, rows, subject=item)
            pipeline_context.set_current_stage_table(table)
        except Exception:
            pass

        log("Internet Archive item detected: select a file with @N to download", file=sys.stderr)
        return 0

    @staticmethod
    def _openlibrary_edition_id_from_url(u: str) -> str:
        try:
            p = urlparse(str(u))
            parts = [x for x in (p.path or "").split("/") if x]
        except Exception:
            parts = []
        # /books/OL35443598M/...
        if len(parts) >= 2 and str(parts[0]).lower() == "books":
            return str(parts[1]).strip()
        return ""

    @staticmethod
    def _title_hint_from_url_slug(u: str) -> str:
        try:
            p = urlparse(str(u))
            parts = [x for x in (p.path or "").split("/") if x]
            slug = parts[-1] if parts else ""
        except Exception:
            slug = ""
        slug = (slug or "").strip().replace("_", " ")
        return slug or "OpenLibrary"

    @staticmethod
    def _path_from_download_result(result_obj: Any) -> Path:
        file_path = None
        if hasattr(result_obj, "path"):
            file_path = getattr(result_obj, "path")
        elif isinstance(result_obj, dict):
            file_path = result_obj.get("path")
        if not file_path:
            file_path = str(result_obj)
        return Path(str(file_path))

    def _emit_local_file(
        self,
        *,
        downloaded_path: Path,
        source: Optional[str],
        title_hint: Optional[str],
        tags_hint: Optional[List[str]],
        media_kind_hint: Optional[str],
        full_metadata: Optional[Dict[str, Any]],
        progress: PipelineProgress,
        config: Dict[str, Any],
        provider_hint: Optional[str] = None,
    ) -> None:
        title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem
        hash_value = self._compute_file_hash(downloaded_path)
        tag: List[str] = []
        if tags_hint:
            tag.extend([str(t) for t in tags_hint if t])
        if not any(str(t).lower().startswith("title:") for t in tag):
            tag.insert(0, f"title:{title_val}")

        payload: Dict[str, Any] = {
            "path": str(downloaded_path),
            "hash": hash_value,
            "title": title_val,
            "action": "cmdlet:download-file",
            "download_mode": "file",
            "store": "local",
            "media_kind": media_kind_hint or "file",
            "tag": tag,
        }
        if provider_hint:
            payload["provider"] = str(provider_hint)
        if full_metadata:
            payload["full_metadata"] = full_metadata
        if source and str(source).startswith("http"):
            payload["url"] = source
        elif source:
            payload["source_url"] = source

        pipeline_context.emit(payload)

        # When running with a local progress UI (standalone cmdlet), ensure
        # the pipe advances on emit.
        progress.on_emit(payload)

        # Automatically register url with local library
        if payload.get("url"):
            pipe_obj = coerce_to_pipe_object(payload)
            register_url_with_local_library(pipe_obj, config)

    def _process_explicit_urls(
        self,
        *,
        raw_urls: Sequence[str],
        final_output_dir: Path,
        config: Dict[str, Any],
        quiet_mode: bool,
        registry: Dict[str, Any],
        progress: PipelineProgress,
    ) -> tuple[int, Optional[int]]:
        downloaded_count = 0

        SearchResult = registry.get("SearchResult")
        get_provider = registry.get("get_provider")
        match_provider_name_for_url = registry.get("match_provider_name_for_url")

        for url in raw_urls:
            try:
                debug(f"Processing URL: {url}")

                # Telegram message URLs are not direct files; route through the provider.
                try:
                    parsed_url = urlparse(str(url))
                    host = (parsed_url.hostname or "").lower().strip()
                except Exception:
                    host = ""

                is_telegram = host in {"t.me", "telegram.me"} or host.endswith(".t.me")
                if is_telegram and SearchResult:
                    try:
                        from ProviderCore.registry import get_provider as _get_provider
                    except Exception:
                        _get_provider = None

                    if _get_provider is None:
                        raise DownloadError("Telegram provider registry not available")

                    provider = _get_provider("telegram", config)
                    if provider is None:
                        raise DownloadError("Telegram provider not configured or not available (check telethon/app_id/api_hash)")

                    sr = SearchResult(table="telegram", title=str(url), path=str(url), full_metadata={})
                    downloaded_path = None
                    telegram_info: Optional[Dict[str, Any]] = None
                    if hasattr(provider, "download_url"):
                        try:
                            downloaded_path, telegram_info = provider.download_url(str(url), final_output_dir)  # type: ignore[attr-defined]
                        except Exception as exc:
                            raise DownloadError(str(exc))
                    else:
                        downloaded_path = provider.download(sr, final_output_dir)

                    if not downloaded_path:
                        raise DownloadError("Telegram download returned no file")

                    channel = ""
                    post = None
                    if isinstance(telegram_info, dict):
                        try:
                            chat_info_raw = telegram_info.get("chat")
                            msg_info_raw = telegram_info.get("message")
                            chat_info: Dict[str, Any] = chat_info_raw if isinstance(chat_info_raw, dict) else {}
                            msg_info: Dict[str, Any] = msg_info_raw if isinstance(msg_info_raw, dict) else {}
                            channel = str(chat_info.get("title") or chat_info.get("username") or "").strip()
                            post = msg_info.get("id")
                        except Exception:
                            channel = ""
                            post = None

                    title_hint = None
                    tg_tags: List[str] = []
                    if channel:
                        tg_tags.append(f"channel:{channel}")
                    if post is not None:
                        tg_tags.append(f"post:{post}")
                    if channel and post is not None:
                        title_hint = f"{channel} {post}"
                    elif post is not None:
                        title_hint = f"post:{post}"
                    else:
                        title_hint = downloaded_path.stem

                    self._emit_local_file(
                        downloaded_path=downloaded_path,
                        source=str(url),
                        title_hint=title_hint,
                        tags_hint=tg_tags,
                        media_kind_hint="file",
                        full_metadata=telegram_info,
                        provider_hint="telegram",
                        progress=progress,
                        config=config,
                    )
                    downloaded_count += 1
                    debug("✓ Downloaded via Telegram provider and emitted")
                    continue

                # Provider URL routing (e.g. OpenLibrary book pages).
                provider_name = None
                if match_provider_name_for_url is not None:
                    try:
                        provider_name = match_provider_name_for_url(str(url))
                    except Exception:
                        provider_name = None

                # Heuristic: LibGen often uses landing pages like edition.php/file.php.
                # These should never be treated as direct file URLs.
                if not provider_name:
                    try:
                        p = urlparse(str(url))
                        h = (p.hostname or "").strip().lower()
                        path = (p.path or "").strip().lower()
                        if "libgen" in h and any(x in path for x in ("/edition.php", "/file.php", "/ads.php", "/get.php", "/series.php")):
                            provider_name = "libgen"
                    except Exception:
                        pass

                if provider_name and get_provider is not None and SearchResult is not None:
                    # OpenLibrary URLs should be handled by the OpenLibrary provider.
                    if provider_name == "openlibrary":
                        provider = get_provider("openlibrary", config)
                        if provider is None:
                            raise DownloadError("OpenLibrary provider not configured or not available")

                        edition_id = self._openlibrary_edition_id_from_url(str(url))
                        title_hint = self._title_hint_from_url_slug(str(url))

                        sr = SearchResult(
                            table="openlibrary",
                            title=title_hint,
                            path=str(url),
                            media_kind="book",
                            full_metadata={
                                "openlibrary_id": edition_id,
                            },
                        )

                        downloaded_path = None
                        try:
                            ui, _pipe_idx = progress.ui_and_pipe_index()
                            progress_cb = None
                            if ui is not None:
                                # High-level steps for OpenLibrary borrow/download flow.
                                progress.begin_steps(5)

                                def _progress(kind: str, done: int, total: Optional[int], label: str) -> None:
                                    # kind:
                                    # - "step": advance step text
                                    # - "pages": update pipe percent/status
                                    # - "bytes": update transfer bar
                                    if kind == "step":
                                        progress.step(label)
                                        return

                                    if kind == "pages":
                                        t = int(total) if isinstance(total, int) else 0
                                        d = int(done) if isinstance(done, int) else 0
                                        if t > 0:
                                            pct = int(round((max(0, min(d, t)) / max(1, t)) * 100.0))
                                            progress.set_percent(pct)
                                            progress.set_status(f"downloading pages {d}/{t}")
                                        else:
                                            progress.set_status(f"downloading pages {d}")
                                        return

                                    if kind == "bytes":
                                        try:
                                            lbl = str(label or "download")
                                        except Exception:
                                            lbl = "download"
                                        progress.begin_transfer(label=lbl, total=total)
                                        progress.update_transfer(label=lbl, completed=done, total=total)
                                        try:
                                            if isinstance(total, int) and total > 0 and int(done) >= int(total):
                                                progress.finish_transfer(label=lbl)
                                        except Exception:
                                            pass
                                        return

                                progress_cb = _progress

                            downloaded_path = provider.download(sr, final_output_dir, progress_callback=progress_cb)  # type: ignore[call-arg]
                        except Exception as exc:
                            raise DownloadError(str(exc))

                        # Clear long-running status line after the download attempt.
                        progress.clear_status()

                        if downloaded_path:
                            tags_hint: Optional[List[str]] = None
                            try:
                                sr_tags = getattr(sr, "tag", None)
                                if isinstance(sr_tags, set) and sr_tags:
                                    tags_hint = sorted([str(t) for t in sr_tags if t])
                            except Exception:
                                tags_hint = None

                            self._emit_local_file(
                                downloaded_path=Path(downloaded_path),
                                source=str(url),
                                title_hint=title_hint,
                                tags_hint=tags_hint,
                                media_kind_hint="book",
                                full_metadata=sr.full_metadata,
                                provider_hint="openlibrary",
                                progress=progress,
                                config=config,
                            )
                            downloaded_count += 1
                            continue

                        # If OpenLibrary can't provide it (not lendable, no creds, etc), auto-search LibGen.
                        try:
                            fallback_query = str(title_hint or "").strip()
                            if fallback_query:
                                log(
                                    f"[download-file] Not available on OpenLibrary; searching LibGen for: {fallback_query}",
                                    file=sys.stderr,
                                )
                                from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET

                                exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None)
                                if callable(exec_fn):
                                    ret = exec_fn(None, ["-provider", "libgen", "-query", fallback_query], config)
                                    try:
                                        table = pipeline_context.get_last_result_table()
                                        items = pipeline_context.get_last_result_items()
                                        if table is not None:
                                            pipeline_context.set_last_result_table_overlay(table, items)
                                    except Exception:
                                        pass

                                    try:
                                        return downloaded_count, int(ret)  # type: ignore[arg-type]
                                    except Exception:
                                        return downloaded_count, 1
                        except Exception:
                            pass

                        log("[download-file] OpenLibrary URL could not be downloaded", file=sys.stderr)
                        continue

                    # Generic provider URL handler (if a provider implements `download_url`).
                    provider = get_provider(provider_name, config)
                    if provider is not None and hasattr(provider, "download_url"):
                        try:
                            downloaded_path = provider.download_url(str(url), final_output_dir)  # type: ignore[attr-defined]
                        except Exception as exc:
                            raise DownloadError(str(exc))

                        if downloaded_path:
                            self._emit_local_file(
                                downloaded_path=Path(downloaded_path),
                                source=str(url),
                                title_hint=Path(str(downloaded_path)).stem,
                                tags_hint=None,
                                media_kind_hint="file",
                                full_metadata=None,
                                provider_hint=str(provider_name),
                                progress=progress,
                                config=config,
                            )
                            downloaded_count += 1
                            continue

                    # Otherwise, try provider.download(SearchResult) with the URL as the target.
                    if provider is not None:
                        sr_obj = None
                        try:
                            sr_obj = SearchResult(
                                table=str(provider_name),
                                title=str(url),
                                path=str(url),
                                full_metadata={},
                            )
                            downloaded_path = provider.download(sr_obj, final_output_dir)  # type: ignore[call-arg]
                        except Exception:
                            downloaded_path = None

                        # Refuse to fall back to direct-download for LibGen landing pages.
                        # This prevents saving HTML (e.g. edition.php) as a bogus file.
                        if (not downloaded_path) and str(provider_name).lower() == "libgen":
                            raise DownloadError("LibGen URL did not resolve to a downloadable file")

                        if downloaded_path:
                            emit_tags: Optional[List[str]] = None
                            full_md: Optional[Dict[str, Any]] = None
                            title_hint = Path(str(downloaded_path)).stem
                            media_kind_hint = "file"

                            if str(provider_name).lower() == "libgen" and sr_obj is not None:
                                media_kind_hint = "book"
                                try:
                                    sr_tags = getattr(sr_obj, "tag", None)
                                    if isinstance(sr_tags, set) and sr_tags:
                                        emit_tags = sorted([str(t) for t in sr_tags if t])
                                except Exception:
                                    emit_tags = None

                                try:
                                    sr_full_md = getattr(sr_obj, "full_metadata", None)
                                    if isinstance(sr_full_md, dict):
                                        full_md = sr_full_md
                                        t = str(sr_full_md.get("title") or "").strip()
                                        if t:
                                            title_hint = t
                                except Exception:
                                    full_md = None

                            self._emit_local_file(
                                downloaded_path=Path(downloaded_path),
                                source=str(url),
                                title_hint=title_hint,
                                tags_hint=emit_tags,
                                media_kind_hint=media_kind_hint,
                                full_metadata=full_md,
                                provider_hint=str(provider_name),
                                progress=progress,
                                config=config,
                            )
                            downloaded_count += 1
                            continue

                result_obj = _download_direct_file(str(url), final_output_dir, quiet=quiet_mode)
                downloaded_path = self._path_from_download_result(result_obj)

                self._emit_local_file(
                    downloaded_path=downloaded_path,
                    source=str(url),
                    title_hint=downloaded_path.stem,
                    tags_hint=[f"title:{downloaded_path.stem}"],
                    media_kind_hint="file",
                    full_metadata=None,
                    progress=progress,
                    config=config,
                )
                downloaded_count += 1
                debug("✓ Downloaded and emitted")

            except DownloadError as e:
                log(f"Download failed for {url}: {e}", file=sys.stderr)
            except Exception as e:
                log(f"Error processing {url}: {e}", file=sys.stderr)

        return downloaded_count, None

    def _expand_provider_items(
        self,
        *,
        piped_items: Sequence[Any],
        registry: Dict[str, Any],
        config: Dict[str, Any],
    ) -> List[Any]:
        get_search_provider = registry.get("get_search_provider")
        expanded_items: List[Any] = []
        for item in piped_items:
            try:
                table = get_field(item, "table")
                media_kind = get_field(item, "media_kind")
                full_metadata = get_field(item, "full_metadata")
                target = get_field(item, "path") or get_field(item, "url")

                if str(table or "").lower() == "alldebrid" and str(media_kind or "").lower() == "folder":
                    magnet_id = None
                    if isinstance(full_metadata, dict):
                        magnet_id = full_metadata.get("magnet_id")
                    if magnet_id is None and isinstance(target, str) and target.lower().startswith("alldebrid:magnet:"):
                        try:
                            magnet_id = int(target.split(":")[-1])
                        except Exception:
                            magnet_id = None

                    if magnet_id is not None and get_search_provider is not None:
                        provider = get_search_provider("alldebrid", config)
                        if provider is not None:
                            try:
                                files = provider.search("*", limit=10_000, filters={"view": "files", "magnet_id": int(magnet_id)})
                            except Exception:
                                files = []

                            # If the magnet isn't ready, provider.search returns a single not-ready folder row.
                            if files and len(files) == 1 and getattr(files[0], "media_kind", "") == "folder":
                                detail = getattr(files[0], "detail", "")
                                log(
                                    f"[download-file] AllDebrid magnet {magnet_id} not ready ({detail or 'unknown'})",
                                    file=sys.stderr,
                                )
                            else:
                                for sr in files:
                                    expanded_items.append(sr.to_dict() if hasattr(sr, "to_dict") else sr)
                                continue

                expanded_items.append(item)
            except Exception:
                expanded_items.append(item)

        return expanded_items

    def _process_provider_items(
        self,
        *,
        piped_items: Sequence[Any],
        final_output_dir: Path,
        config: Dict[str, Any],
        quiet_mode: bool,
        registry: Dict[str, Any],
        progress: PipelineProgress,
    ) -> int:
        downloaded_count = 0
        get_search_provider = registry.get("get_search_provider")
        SearchResult = registry.get("SearchResult")

        expanded_items = self._expand_provider_items(piped_items=piped_items, registry=registry, config=config)

        for item in expanded_items:
            try:
                table = get_field(item, "table")
                title = get_field(item, "title")
                target = get_field(item, "path") or get_field(item, "url")
                media_kind = get_field(item, "media_kind")
                tags_val = get_field(item, "tag")
                tags_list: Optional[List[str]]
                if isinstance(tags_val, list):
                    tags_list = [str(t) for t in tags_val if t]
                else:
                    tags_list = None

                full_metadata = get_field(item, "full_metadata")
                if (not full_metadata) and isinstance(item, dict) and isinstance(item.get("extra"), dict):
                    extra_md = item["extra"].get("full_metadata")
                    if isinstance(extra_md, dict):
                        full_metadata = extra_md

                # If this looks like a provider item and providers are available, prefer provider.download()
                downloaded_path: Optional[Path] = None
                attempted_provider_download = False
                provider_sr = None
                if table and get_search_provider and SearchResult:
                    provider = get_search_provider(str(table), config)
                    if provider is not None:
                        attempted_provider_download = True
                        sr = SearchResult(
                            table=str(table),
                            title=str(title or "Unknown"),
                            path=str(target or ""),
                            full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
                        )
                        debug(f"[download-file] Downloading provider item via {table}: {sr.title}")

                        # Preserve provider structure when possible (AllDebrid folders -> subfolders).
                        output_dir = final_output_dir
                        try:
                            if str(table).strip().lower() == "alldebrid":
                                from ProviderCore.download import sanitize_filename as _sf

                                md = full_metadata if isinstance(full_metadata, dict) else {}
                                magnet_name = None
                                if isinstance(md, dict):
                                    magnet_name = md.get("magnet_name") or md.get("folder")
                                if not magnet_name:
                                    magnet_name = str(get_field(item, "detail") or "").strip() or None

                                if magnet_name:
                                    output_dir = Path(output_dir) / _sf(str(magnet_name))

                                    relpath = None
                                    if isinstance(md, dict):
                                        relpath = md.get("relpath")
                                        if not relpath and isinstance(md.get("file"), dict):
                                            relpath = md["file"].get("_relpath")

                                    if relpath:
                                        parts = [p for p in str(relpath).replace("\\", "/").split("/") if p and p not in {".", ".."}]
                                        # relpath includes the filename; only join parent directories.
                                        for part in parts[:-1]:
                                            output_dir = Path(output_dir) / _sf(part)

                                    try:
                                        Path(output_dir).mkdir(parents=True, exist_ok=True)
                                    except Exception:
                                        output_dir = final_output_dir
                        except Exception:
                            output_dir = final_output_dir

                        downloaded_path = provider.download(sr, output_dir)
                        provider_sr = sr

                # OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML.
                if downloaded_path is None and attempted_provider_download and str(table or "").lower() == "openlibrary":
                    availability = None
                    reason = None
                    if isinstance(full_metadata, dict):
                        availability = full_metadata.get("availability")
                        reason = full_metadata.get("availability_reason")
                    msg = "[download-file] OpenLibrary item not downloadable"
                    if availability or reason:
                        msg += f" (availability={availability or ''} reason={reason or ''})"
                    log(msg, file=sys.stderr)

                    # Fallback: run a LibGen title search so the user can pick an alternative source.
                    try:
                        title_text = str(title or "").strip()
                        if not title_text and isinstance(full_metadata, dict):
                            title_text = str(full_metadata.get("title") or "").strip()
                        if title_text:
                            log(f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}", file=sys.stderr)
                            from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET

                            fallback_query = title_text
                            exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None)
                            if not callable(exec_fn):
                                log(
                                    "[download-file] search-provider cmdlet unavailable; cannot run LibGen fallback search",
                                    file=sys.stderr,
                                )
                                continue

                            ret = exec_fn(
                                None,
                                ["-provider", "libgen", "-query", fallback_query],
                                config,
                            )

                            # Promote the search-provider table to a display overlay so it renders.
                            try:
                                table_obj = pipeline_context.get_last_result_table()
                                items_obj = pipeline_context.get_last_result_items()
                                if table_obj is not None:
                                    pipeline_context.set_last_result_table_overlay(table_obj, items_obj)
                            except Exception:
                                pass

                            try:
                                return int(ret)  # type: ignore[arg-type]
                            except Exception:
                                return 1
                    except Exception:
                        pass

                    continue

                # Fallback: if we have a direct HTTP URL, download it directly
                if downloaded_path is None and isinstance(target, str) and target.startswith("http"):
                    # Guard: provider landing pages (e.g. LibGen ads.php) are HTML, not files.
                    # Never download these as "files".
                    if str(table or "").lower() == "libgen":
                        low = target.lower()
                        if ("/ads.php" in low) or ("/file.php" in low) or ("/index.php" in low):
                            log(
                                "[download-file] Refusing to download LibGen landing page (expected provider to resolve file link)",
                                file=sys.stderr,
                            )
                            continue
                    debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
                    suggested_name = str(title).strip() if title is not None else None
                    result_obj = _download_direct_file(
                        target,
                        final_output_dir,
                        quiet=quiet_mode,
                        suggested_filename=suggested_name,
                    )
                    downloaded_path = self._path_from_download_result(result_obj)

                if downloaded_path is None:
                    log(f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr)
                    continue

                # Allow providers to add/enrich tags and metadata during download.
                if str(table or "").lower() == "libgen" and provider_sr is not None:
                    try:
                        sr_tags = getattr(provider_sr, "tag", None)
                        if tags_list is None and isinstance(sr_tags, set) and sr_tags:
                            tags_list = sorted([str(t) for t in sr_tags if t])
                    except Exception:
                        pass

                    try:
                        sr_md = getattr(provider_sr, "full_metadata", None)
                        if isinstance(sr_md, dict) and sr_md:
                            full_metadata = sr_md
                    except Exception:
                        pass

                    try:
                        if isinstance(full_metadata, dict):
                            t = str(full_metadata.get("title") or "").strip()
                            if t:
                                title = t
                    except Exception:
                        pass

                self._emit_local_file(
                    downloaded_path=downloaded_path,
                    source=str(target) if target else None,
                    title_hint=str(title) if title else downloaded_path.stem,
                    tags_hint=tags_list,
                    media_kind_hint=str(media_kind) if media_kind else None,
                    full_metadata=full_metadata if isinstance(full_metadata, dict) else None,
                    progress=progress,
                    config=config,
                )
                downloaded_count += 1

            except DownloadError as e:
                log(f"Download failed: {e}", file=sys.stderr)
            except Exception as e:
                log(f"Error downloading item: {e}", file=sys.stderr)

        return downloaded_count

    def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Main download implementation for direct HTTP files."""
        progress = PipelineProgress(pipeline_context)
        try:
            debug("Starting download-file")

            # Parse arguments
            parsed = parse_cmdlet_args(args, self)

            raw_url = self._normalize_urls(parsed)
            piped_items = self._collect_piped_items_if_no_urls(result, raw_url)

            if not raw_url and not piped_items:
                log("No url or piped items to download", file=sys.stderr)
                return 1

            quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
            ia_picker_exit = self._maybe_show_internetarchive_formats(
                raw_urls=raw_url,
                piped_items=piped_items,
                parsed=parsed,
                config=config,
                quiet_mode=quiet_mode,
            )
            if ia_picker_exit is not None:
                return int(ia_picker_exit)

            # Get output directory
            final_output_dir = self._resolve_output_dir(parsed, config)
            if not final_output_dir:
                return 1

            debug(f"Output directory: {final_output_dir}")

            # If the caller isn't running the shared pipeline Live progress UI (e.g. direct
            # cmdlet execution), start a minimal local pipeline progress panel so downloads
            # show consistent, Rich-formatted progress (like download-media).
            total_items = self._safe_total_items(raw_url, piped_items)
            preview = self._build_preview(raw_url, piped_items, total_items)

            progress.ensure_local_ui(label="download-file", total_items=total_items, items_preview=preview)

            registry = self._load_provider_registry()

            downloaded_count = 0
            urls_downloaded, early_exit = self._process_explicit_urls(
                raw_urls=raw_url,
                final_output_dir=final_output_dir,
                config=config,
                quiet_mode=quiet_mode,
                registry=registry,
                progress=progress,
            )
            downloaded_count += int(urls_downloaded)
            if early_exit is not None:
                return int(early_exit)

            downloaded_count += self._process_provider_items(
                piped_items=piped_items,
                final_output_dir=final_output_dir,
                config=config,
                quiet_mode=quiet_mode,
                registry=registry,
                progress=progress,
            )

            if downloaded_count > 0:
                debug(f"✓ Successfully processed {downloaded_count} file(s)")
                return 0

            log("No downloads completed", file=sys.stderr)
            return 1

        except Exception as e:
            log(f"Error in download-file: {e}", file=sys.stderr)
            return 1

        finally:
            progress.close_local_ui(force_complete=True)

    def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
        """Resolve the output directory from storage location or config."""
        output_dir_arg = parsed.get("path") or parsed.get("output")
        if output_dir_arg:
            try:
                out_path = Path(str(output_dir_arg)).expanduser()
                out_path.mkdir(parents=True, exist_ok=True)
                return out_path
            except Exception as e:
                log(f"Cannot use output directory {output_dir_arg}: {e}", file=sys.stderr)
                return None

        storage_location = parsed.get("storage")

        # Priority 1: --storage flag
        if storage_location:
            try:
                return SharedArgs.resolve_storage(storage_location)
            except Exception as e:
                log(f"Invalid storage location: {e}", file=sys.stderr)
                return None

        # Priority 2: Config default output/temp directory
        try:
            from config import resolve_output_dir
            final_output_dir = resolve_output_dir(config)
        except Exception:
            final_output_dir = Path.home() / "Downloads"

        debug(f"Using default directory: {final_output_dir}")

        # Ensure directory exists
        try:
            final_output_dir.mkdir(parents=True, exist_ok=True)
        except Exception as e:
            log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr)
            return None

        return final_output_dir

    def _compute_file_hash(self, filepath: Path) -> str:
        """Compute SHA256 hash of a file."""
        import hashlib
        sha256_hash = hashlib.sha256()
        with open(filepath, "rb") as f:
            for byte_block in iter(lambda: f.read(4096), b""):
                sha256_hash.update(byte_block)
        return sha256_hash.hexdigest()


# Module-level singleton registration
CMDLET = Download_File()