df

2026-01-04 02:23:50 -08:00
parent 3acf21a673
commit 8545367e28
6 changed files with 2925 additions and 94 deletions
--- a/Provider/alldebrid.py
+++ b/Provider/alldebrid.py
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import hashlib
+import json
 import sys
 import time
 from pathlib import Path
@@ -8,11 +9,145 @@ from typing import Any, Dict, Iterable, List, Optional, Callable, Tuple
 from urllib.parse import urlparse

 from API.HTTP import HTTPClient
-from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_magnet_link, is_torrent_file
+from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_torrent_file
 from ProviderCore.base import Provider, SearchResult
 from ProviderCore.download import sanitize_filename
 from SYS.download import _download_direct_file
 from SYS.logger import log
+from SYS.models import DownloadError
+
+_HOSTS_CACHE_TTL_SECONDS = 24 * 60 * 60
+
+
+def _repo_root() -> Path:
+    try:
+        return Path(__file__).resolve().parents[1]
+    except Exception:
+        return Path(".")
+
+
+def _hosts_cache_path() -> Path:
+    # Keep this local to the repo so it works in portable installs.
+    # The registry's URL routing can read this file without instantiating providers.
+    #
+    # This file is expected to be the JSON payload shape from AllDebrid:
+    #   {"status":"success","data":{"hosts":[...],"streams":[...],"redirectors":[...]}}
+    return _repo_root() / "API" / "data" / "alldebrid.json"
+
+
+def _load_cached_domains(category: str) -> List[str]:
+    """Load cached domain list from API/data/alldebrid.json.
+
+    category: "hosts" | "streams" | "redirectors"
+    """
+
+    wanted = str(category or "").strip().lower()
+    if wanted not in {"hosts", "streams", "redirectors"}:
+        return []
+
+    path = _hosts_cache_path()
+    try:
+        if not path.exists() or not path.is_file():
+            return []
+        payload = json.loads(path.read_text(encoding="utf-8"))
+    except Exception:
+        return []
+
+    if not isinstance(payload, dict):
+        return []
+
+    data = payload.get("data")
+    if not isinstance(data, dict):
+        # Back-compat for older cache shapes.
+        data = payload
+        if not isinstance(data, dict):
+            return []
+
+    raw_list = data.get(wanted)
+    if not isinstance(raw_list, list):
+        return []
+
+    out: List[str] = []
+    seen: set[str] = set()
+    for d in raw_list:
+        try:
+            dom = str(d or "").strip().lower()
+        except Exception:
+            continue
+        if not dom:
+            continue
+        if dom.startswith("http://") or dom.startswith("https://"):
+            # Accidentally stored as a URL; normalize to hostname.
+            try:
+                p = urlparse(dom)
+                dom = str(p.hostname or "").strip().lower()
+            except Exception:
+                continue
+        if dom.startswith("www."):
+            dom = dom[4:]
+        if not dom or dom in seen:
+            continue
+        seen.add(dom)
+        out.append(dom)
+    return out
+
+
+def _load_cached_hoster_domains() -> List[str]:
+    # For URL routing (download-file), we intentionally use only the "hosts" list.
+    # The "streams" list is extremely broad and would steal URLs from other providers.
+    return _load_cached_domains("hosts")
+
+
+def _save_cached_hosts_payload(payload: Dict[str, Any]) -> None:
+    path = _hosts_cache_path()
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+    except Exception:
+        return
+    try:
+        path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+    except Exception:
+        return
+
+
+def _cache_is_fresh() -> bool:
+    path = _hosts_cache_path()
+    try:
+        if not path.exists() or not path.is_file():
+            return False
+        mtime = float(path.stat().st_mtime)
+        return (time.time() - mtime) < _HOSTS_CACHE_TTL_SECONDS
+    except Exception:
+        return False
+
+
+def _fetch_hosts_payload_v4_hosts() -> Optional[Dict[str, Any]]:
+    """Fetch the public AllDebrid hosts payload.
+
+    This intentionally does NOT require an API key.
+    Endpoint referenced by user: https://api.alldebrid.com/v4/hosts
+    """
+
+    url = "https://api.alldebrid.com/v4/hosts"
+    try:
+        with HTTPClient(timeout=20.0) as client:
+            resp = client.get(url)
+            resp.raise_for_status()
+            data = resp.json()
+            return data if isinstance(data, dict) else None
+    except Exception as exc:
+        log(f"[alldebrid] Failed to fetch hosts list: {exc}", file=sys.stderr)
+        return None
+
+
+def refresh_alldebrid_hoster_cache(*, force: bool = False) -> None:
+    """Refresh the on-disk cache of host domains (best-effort)."""
+    if (not force) and _cache_is_fresh():
+        return
+
+    payload = _fetch_hosts_payload_v4_hosts()
+    if isinstance(payload, dict) and payload:
+        _save_cached_hosts_payload(payload)


 def _get_debrid_api_key(config: Dict[str, Any]) -> Optional[str]:
@@ -177,7 +312,7 @@ def prepare_magnet(
    api_key = _get_debrid_api_key(config or {})
    if not api_key:
        try:
-            from ProviderCore.registry import show_provider_config_panel
+            from SYS.rich_display import show_provider_config_panel

            show_provider_config_panel("alldebrid", ["api_key"])
        except Exception:
@@ -193,7 +328,8 @@ def prepare_magnet(

    try:
        magnet_info = client.magnet_add(magnet_spec)
-        magnet_id = int(magnet_info.get("id", 0))
+        magnet_id_val = magnet_info.get("id") or 0
+        magnet_id = int(magnet_id_val)
        if magnet_id <= 0:
            log(f"AllDebrid magnet submission failed: {magnet_info}", file=sys.stderr)
            return None, None
@@ -409,6 +545,26 @@ def adjust_output_dir_for_alldebrid(
 class AllDebrid(Provider):
    # Magnet URIs should be routed through this provider.
    URL = ("magnet:",)
+    URL_DOMAINS = ()
+
+    @classmethod
+    def url_patterns(cls) -> Tuple[str, ...]:
+        # Combine static patterns with cached host domains.
+        patterns = list(super().url_patterns())
+        try:
+            cached = _load_cached_hoster_domains()
+            for d in cached:
+                dom = str(d or "").strip().lower()
+                if dom and dom not in patterns:
+                    patterns.append(dom)
+            log(
+                f"[alldebrid] url_patterns loaded {len(cached)} cached host domains; total patterns={len(patterns)}",
+                file=sys.stderr,
+            )
+        except Exception:
+            pass
+        return tuple(patterns)
+
    """Search provider for AllDebrid account content.

    This provider lists and searches the files/magnets already present in the
@@ -421,7 +577,15 @@ class AllDebrid(Provider):

    def validate(self) -> bool:
        # Consider "available" when configured; actual API connectivity can vary.
-        return bool(_get_debrid_api_key(self.config or {}))
+        ok = bool(_get_debrid_api_key(self.config or {}))
+        if ok:
+            # Best-effort: refresh cached host domains so future URL routing can
+            # route supported hosters through this provider.
+            try:
+                refresh_alldebrid_hoster_cache(force=False)
+            except Exception:
+                pass
+        return ok

    def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
        """Download an AllDebrid SearchResult into output_dir.
@@ -435,10 +599,12 @@ class AllDebrid(Provider):
        try:
            api_key = _get_debrid_api_key(self.config or {})
            if not api_key:
+                log("[alldebrid] download skipped: missing api_key", file=sys.stderr)
                return None

            target = str(getattr(result, "path", "") or "").strip()
            if not target.startswith(("http://", "https://")):
+                log(f"[alldebrid] download skipped: target not http(s): {target}", file=sys.stderr)
                return None

            try:
@@ -449,35 +615,59 @@ class AllDebrid(Provider):
                log(f"[alldebrid] Failed to init client: {exc}", file=sys.stderr)
                return None

-            # Quiet mode when download-file is mid-pipeline.
-            quiet = (
-                bool(self.config.get("_quiet_background_output"))
-                if isinstance(self.config,
-                              dict) else False
-            )
+            log(f"[alldebrid] download routing target={target}", file=sys.stderr)

-            unlocked_url = target
-            try:
-                unlocked = client.unlock_link(target)
-                if isinstance(unlocked,
-                              str) and unlocked.strip().startswith(("http://",
-                                                                    "https://")):
-                    unlocked_url = unlocked.strip()
-            except Exception as exc:
-                # Fall back to the raw link, but warn.
-                log(f"[alldebrid] Failed to unlock link: {exc}", file=sys.stderr)
-
-            # Prefer provider title as the output filename.
-            suggested = sanitize_filename(
-                str(getattr(result,
-                            "title",
-                            "") or "").strip()
-            )
+            # Prefer provider title as the output filename; later we may override if unlocked URL has a better basename.
+            suggested = sanitize_filename(str(getattr(result, "title", "") or "").strip())
            suggested_name = suggested if suggested else None

-            try:
-                from SYS.download import _download_direct_file
+            # Quiet mode when download-file is mid-pipeline.
+            quiet = bool(self.config.get("_quiet_background_output")) if isinstance(self.config, dict) else False

+            def _html_guard(path: Path) -> bool:
+                try:
+                    if path.exists():
+                        size = path.stat().st_size
+                        if size > 0 and size <= 250_000 and path.suffix.lower() not in (".html", ".htm"):
+                            head = path.read_bytes()[:512]
+                            try:
+                                text = head.decode("utf-8", errors="ignore").lower()
+                            except Exception:
+                                text = ""
+                            if "<html" in text or "<!doctype html" in text:
+                                return True
+                except Exception:
+                    return False
+                return False
+
+            def _download_unlocked(unlocked_url: str, *, allow_html: bool = False) -> Optional[Path]:
+                # If this is an unlocked debrid link (allow_html=True), stream it directly and skip
+                # the generic HTML guard to avoid falling back to the public hoster.
+                if allow_html:
+                    try:
+                        from API.HTTP import HTTPClient
+
+                        fname = suggested_name or sanitize_filename(Path(urlparse(unlocked_url).path).name)
+                        if not fname:
+                            fname = "download"
+                        if not Path(fname).suffix:
+                            fname = f"{fname}.bin"
+                        dest = Path(output_dir) / fname
+                        dest.parent.mkdir(parents=True, exist_ok=True)
+                        with HTTPClient(timeout=30.0) as client:
+                            with client._request_stream("GET", unlocked_url, follow_redirects=True) as resp:
+                                resp.raise_for_status()
+                                with dest.open("wb") as fh:
+                                    for chunk in resp.iter_bytes():
+                                        if not chunk:
+                                            continue
+                                        fh.write(chunk)
+                        return dest if dest.exists() else None
+                    except Exception as exc2:
+                        log(f"[alldebrid] raw stream (unlocked) failed: {exc2}", file=sys.stderr)
+                        return None
+
+                # Otherwise, use standard downloader with guardrails.
                pipe_progress = None
                try:
                    if isinstance(self.config, dict):
@@ -485,47 +675,73 @@ class AllDebrid(Provider):
                except Exception:
                    pipe_progress = None

-                dl_res = _download_direct_file(
-                    unlocked_url,
-                    Path(output_dir),
-                    quiet=quiet,
-                    suggested_filename=suggested_name,
-                    pipeline_progress=pipe_progress,
-                )
-                downloaded_path = getattr(dl_res, "path", None)
-                if downloaded_path is None:
-                    return None
-                downloaded_path = Path(str(downloaded_path))
-
-                # Guard: if we got an HTML error/redirect page, treat as failure.
                try:
-                    if downloaded_path.exists():
-                        size = downloaded_path.stat().st_size
-                        if (size > 0 and size <= 250_000
-                                and downloaded_path.suffix.lower() not in (".html",
-                                                                           ".htm")):
-                            head = downloaded_path.read_bytes()[:512]
-                            try:
-                                text = head.decode("utf-8", errors="ignore").lower()
-                            except Exception:
-                                text = ""
-                            if "<html" in text or "<!doctype html" in text:
-                                try:
-                                    downloaded_path.unlink()
-                                except Exception:
-                                    pass
-                                log(
-                                    "[alldebrid] Download returned HTML page (not file bytes). Try again or check AllDebrid link status.",
-                                    file=sys.stderr,
-                                )
-                                return None
+                    dl_res = _download_direct_file(
+                        unlocked_url,
+                        Path(output_dir),
+                        quiet=quiet,
+                        suggested_filename=suggested_name,
+                        pipeline_progress=pipe_progress,
+                    )
+                    downloaded_path = getattr(dl_res, "path", None)
+                    if downloaded_path is None:
+                        return None
+                    downloaded_path = Path(str(downloaded_path))
+                except DownloadError as exc:
+                    log(
+                        f"[alldebrid] _download_direct_file rejected URL ({exc}); no further fallback", file=sys.stderr
+                    )
+                    return None
+
+                try:
+                    if _html_guard(downloaded_path):
+                        log(
+                            "[alldebrid] Download returned HTML page (not file bytes). Try again or check AllDebrid link status.",
+                            file=sys.stderr,
+                        )
+                        return None
                except Exception:
                    pass

                return downloaded_path if downloaded_path.exists() else None
+
+            unlocked_url = target
+            try:
+                unlocked = client.resolve_unlock_link(target, poll=True, max_wait_seconds=45, poll_interval_seconds=5)
+                if isinstance(unlocked, str) and unlocked.strip().startswith(("http://", "https://")):
+                    unlocked_url = unlocked.strip()
+                log(f"[alldebrid] unlock -> {unlocked_url}", file=sys.stderr)
            except Exception as exc:
-                log(f"[alldebrid] Download failed: {exc}", file=sys.stderr)
-                return None
+                log(f"[alldebrid] Failed to unlock link: {exc}", file=sys.stderr)
+
+            if unlocked_url != target:
+                # Prefer filename from unlocked URL path.
+                try:
+                    unlocked_name = sanitize_filename(Path(urlparse(unlocked_url).path).name)
+                    if unlocked_name:
+                        suggested_name = unlocked_name
+                except Exception:
+                    pass
+
+            # When using an unlocked URL different from the original hoster, stream it directly and do NOT fall back to the public URL.
+            allow_html = unlocked_url != target
+            log(
+                f"[alldebrid] downloading from {unlocked_url} (allow_html={allow_html})",
+                file=sys.stderr,
+            )
+            downloaded = _download_unlocked(unlocked_url, allow_html=allow_html)
+            if downloaded:
+                log(f"[alldebrid] downloaded -> {downloaded}", file=sys.stderr)
+                return downloaded
+
+            # If unlock failed entirely and we never changed URL, allow a single attempt on the original target.
+            if unlocked_url == target:
+                downloaded = _download_unlocked(target, allow_html=False)
+                if downloaded:
+                    log(f"[alldebrid] downloaded (original target) -> {downloaded}", file=sys.stderr)
+                    return downloaded
+
+            return None
        except Exception:
            return None

@@ -620,9 +836,12 @@ class AllDebrid(Provider):
            if magnet_id_val is None:
                magnet_id_val = kwargs.get("magnet_id")

+            if magnet_id_val is None:
+                return []
+
            try:
                magnet_id = int(magnet_id_val)
-            except Exception:
+            except (TypeError, ValueError):
                return []

            magnet_status: Dict[str,
@@ -769,9 +988,12 @@ class AllDebrid(Provider):
            if not isinstance(magnet, dict):
                continue

+            magnet_id_val = magnet.get("id")
+            if magnet_id_val is None:
+                continue
            try:
-                magnet_id = int(magnet.get("id"))
-            except Exception:
+                magnet_id = int(magnet_id_val)
+            except (TypeError, ValueError):
                continue

            magnet_name = str(