"""Smart downloader front-door.

Currently focused on Internet Archive item pages:
- Takes a piped InternetArchive search-provider row (table=internetarchive) or an archive.org details URL
- Displays a selectable table of available files/formats (PDF/ZIP/OCR/etc)
- Selecting a row via @N expands to download-file <direct-url>

This enables:
  search-provider -provider internetarchive "..."
  @3                # shows formats table
  @2 | add-file ...  # downloads selected file then pipes to add-file
"""

from __future__ import annotations

import re
import sys
from typing import Any, Dict, List, Sequence, cast
from urllib.parse import quote

from SYS.logger import log, debug
import pipeline as pipeline_context
from result_table import ResultTable

from . import _shared as sh

Cmdlet = sh.Cmdlet
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
get_field = sh.get_field


def _extract_ia_identifier(text: str) -> str:
    s = str(text or "").strip()
    if not s:
        return ""

    # https://archive.org/details/<identifier>
    m = re.search(r"archive\.org/(?:details|download)/([^/?#\s]+)", s, flags=re.IGNORECASE)
    if m:
        return str(m.group(1) or "").strip()

    # internetarchive:<identifier>
    if s.lower().startswith("internetarchive:"):
        return s.split(":", 1)[-1].strip()

    return ""


class Download_Data(Cmdlet):
    def __init__(self) -> None:
        super().__init__(
            name="download-data",
            summary="List downloadable files/formats for provider items (e.g., Internet Archive)",
            usage="download-data <url> OR @N | download-data (provider item), then select a file with @N",
            alias=[],
            arg=[SharedArgs.URL],
            detail=[
                "For Internet Archive item pages, shows a selectable list of available files (PDF/ZIP/OCR/etc).",
                "Select a file row with @N to run download-file on that direct URL.",
            ],
            exec=self.run,
        )
        self.register()

    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        try:
            # parse_cmdlet_args typing varies across cmdlets; keep runtime behavior.
            parsed = parse_cmdlet_args(args, cast(Any, self))
        except Exception:
            parsed = {}

        raw_urls = parsed.get("url", [])
        if isinstance(raw_urls, str):
            raw_urls = [raw_urls]
        url_arg = str(raw_urls[0]).strip() if raw_urls else ""

        piped_items: List[Any] = []
        if isinstance(result, list):
            piped_items = list(result)
        elif result is not None:
            piped_items = [result]

        # Prefer piped item target if present.
        target = ""
        if piped_items:
            target = str(get_field(piped_items[0], "path") or get_field(piped_items[0], "url") or "").strip()
        if not target:
            target = url_arg

        table_name = ""
        try:
            table_name = str(get_field(piped_items[0], "table") or "").strip().lower() if piped_items else ""
        except Exception:
            table_name = ""

        identifier = ""
        if piped_items:
            md = get_field(piped_items[0], "full_metadata")
            if isinstance(md, dict):
                identifier = str(md.get("identifier") or "").strip()
        if not identifier:
            identifier = _extract_ia_identifier(target)

        if table_name == "internetarchive" or ("archive.org" in target.lower() and identifier):
            return self._run_internetarchive(piped_items[0] if piped_items else None, identifier=identifier)

        log("download-data: unsupported target (currently only Internet Archive item pages are supported)", file=sys.stderr)
        return 1

    @staticmethod
    def _run_internetarchive(item: Any, *, identifier: str) -> int:
        try:
            from Provider.internetarchive import _ia as _ia_loader
        except Exception as exc:
            log(f"download-data: Internet Archive provider unavailable: {exc}", file=sys.stderr)
            return 1

        def _is_ia_metadata_file(f: Dict[str, Any]) -> bool:
            try:
                source = str(f.get("source") or "").strip().lower()
                fmt = str(f.get("format") or "").strip().lower()
            except Exception:
                source = ""
                fmt = ""

            if source == "metadata":
                return True
            if fmt in {"metadata", "archive bittorrent"}:
                return True
            if fmt.startswith("thumbnail"):
                return True
            return False

        ia = None
        try:
            ia = _ia_loader()
        except Exception as exc:
            log(f"download-data: Internet Archive module unavailable: {exc}", file=sys.stderr)
            return 1

        try:
            get_item = getattr(ia, "get_item", None)
            if not callable(get_item):
                raise Exception("internetarchive.get_item is not available")
            ia_item = cast(Any, get_item(str(identifier)))
        except Exception as exc:
            log(f"download-data: Internet Archive item lookup failed: {exc}", file=sys.stderr)
            return 1

        files: List[Dict[str, Any]] = []
        try:
            raw_files = getattr(ia_item, "files", None)
            if isinstance(raw_files, list):
                for f in raw_files:
                    if isinstance(f, dict):
                        files.append(f)
        except Exception:
            files = []

        if not files:
            try:
                for f in ia_item.get_files():
                    name = getattr(f, "name", None)
                    if not name and isinstance(f, dict):
                        name = f.get("name")
                    if not name:
                        continue
                    files.append(
                        {
                            "name": str(name),
                            "size": getattr(f, "size", None),
                            "format": getattr(f, "format", None),
                            "source": getattr(f, "source", None),
                        }
                    )
            except Exception:
                files = []

        if not files:
            log("download-data: Internet Archive item has no files", file=sys.stderr)
            return 1

        # Prefer non-metadata files for the picker.
        candidates = [f for f in files if not _is_ia_metadata_file(f)]
        if not candidates:
            candidates = list(files)

        def _key(f: Dict[str, Any]) -> tuple[str, str]:
            fmt = str(f.get("format") or "").strip().lower()
            name = str(f.get("name") or "").strip().lower()
            return (fmt, name)

        candidates.sort(key=_key)

        title = ""
        try:
            title = str(get_field(item, "title") or "").strip()
        except Exception:
            title = ""

        table_title = f"Internet Archive: {title}".strip().rstrip(":")
        if not title:
            table_title = f"Internet Archive: {identifier}".strip().rstrip(":")

        table = ResultTable(table_title).set_preserve_order(True)
        table.set_table("internetarchive.formats")
        # Selecting a row should expand to `download-file <direct-url>`.
        table.set_source_command("download-file", [])

        rows: List[Dict[str, Any]] = []
        for f in candidates:
            name = str(f.get("name") or "").strip()
            if not name:
                continue

            fmt = str(f.get("format") or "").strip()
            src = str(f.get("source") or "").strip()

            size_val: Any = f.get("size")
            try:
                size_val = int(size_val) if size_val not in (None, "") else ""
            except Exception:
                # Keep as-is; ResultTable will stringify.
                pass

            direct_url = f"https://archive.org/download/{identifier}/{quote(name, safe='')}"

            row_item: Dict[str, Any] = {
                "table": "internetarchive",
                "title": fmt or name,
                "path": direct_url,
                "url": direct_url,
                "columns": [
                    ("Format", fmt),
                    ("Name", name),
                    ("Size", size_val),
                    ("Source", src),
                ],
                # Used by @N expansion: download-file <direct-url>
                "_selection_args": [direct_url],
                "full_metadata": {
                    "identifier": identifier,
                    "name": name,
                    "format": fmt,
                    "source": src,
                    "size": f.get("size"),
                },
            }

            rows.append(row_item)
            table.add_result(row_item)

        if not rows:
            log("download-data: no downloadable files found for this item", file=sys.stderr)
            return 1

        try:
            pipeline_context.set_last_result_table(table, rows, subject=item)
            pipeline_context.set_current_stage_table(table)
        except Exception as exc:
            debug(f"[download-data] Failed to register result table: {exc}")

        return 0


CMDLET = Download_Data()