df
This commit is contained in:
@@ -12,6 +12,7 @@ import sys
|
||||
import time
|
||||
|
||||
from typing import Any, Dict, Optional, Set, List, Sequence, Tuple
|
||||
import time
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from SYS.logger import log, debug
|
||||
@@ -245,6 +246,73 @@ class AllDebridClient:
|
||||
except Exception as exc:
|
||||
raise AllDebridError(f"Failed to unlock link: {exc}")
|
||||
|
||||
def _link_delayed(self, delayed_id: int) -> Dict[str, Any]:
|
||||
"""Poll delayed link status."""
|
||||
|
||||
try:
|
||||
resp = self._request("link/delayed", {"id": int(delayed_id)})
|
||||
if resp.get("status") != "success":
|
||||
raise AllDebridError("link/delayed returned error status")
|
||||
data = resp.get("data") or {}
|
||||
return data if isinstance(data, dict) else {}
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise AllDebridError(f"Failed to poll delayed link: {exc}")
|
||||
|
||||
def resolve_unlock_link(
|
||||
self,
|
||||
link: str,
|
||||
*,
|
||||
poll: bool = True,
|
||||
max_wait_seconds: int = 30,
|
||||
poll_interval_seconds: int = 5,
|
||||
) -> Optional[str]:
|
||||
"""Unlock a link and handle delayed links by polling link/delayed."""
|
||||
|
||||
try:
|
||||
resp = self._request("link/unlock", {"link": link})
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise AllDebridError(f"Failed to unlock link: {exc}")
|
||||
|
||||
if resp.get("status") != "success":
|
||||
return None
|
||||
|
||||
data = resp.get("data") or {}
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
|
||||
# Immediate link ready
|
||||
for key in ("link", "file"):
|
||||
val = data.get(key)
|
||||
if isinstance(val, str) and val.strip():
|
||||
return val.strip()
|
||||
|
||||
delayed_id = data.get("delayed")
|
||||
if not poll or delayed_id is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
delayed_int = int(delayed_id)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
deadline = time.time() + max_wait_seconds
|
||||
while time.time() < deadline:
|
||||
time.sleep(max(1, poll_interval_seconds))
|
||||
status_data = self._link_delayed(delayed_int)
|
||||
status = status_data.get("status")
|
||||
if status == 2:
|
||||
link_val = status_data.get("link")
|
||||
if isinstance(link_val, str) and link_val.strip():
|
||||
return link_val.strip()
|
||||
return None
|
||||
if status == 3:
|
||||
raise AllDebridError("Delayed link generation failed")
|
||||
|
||||
return None
|
||||
def check_host(self, hostname: str) -> Dict[str, Any]:
|
||||
"""Check if a host is supported by AllDebrid.
|
||||
|
||||
|
||||
2371
API/data/alldebrid.json
Normal file
2371
API/data/alldebrid.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
@@ -8,11 +9,145 @@ from typing import Any, Dict, Iterable, List, Optional, Callable, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from API.HTTP import HTTPClient
|
||||
from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_magnet_link, is_torrent_file
|
||||
from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_torrent_file
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from ProviderCore.download import sanitize_filename
|
||||
from SYS.download import _download_direct_file
|
||||
from SYS.logger import log
|
||||
from SYS.models import DownloadError
|
||||
|
||||
_HOSTS_CACHE_TTL_SECONDS = 24 * 60 * 60
|
||||
|
||||
|
||||
def _repo_root() -> Path:
|
||||
try:
|
||||
return Path(__file__).resolve().parents[1]
|
||||
except Exception:
|
||||
return Path(".")
|
||||
|
||||
|
||||
def _hosts_cache_path() -> Path:
|
||||
# Keep this local to the repo so it works in portable installs.
|
||||
# The registry's URL routing can read this file without instantiating providers.
|
||||
#
|
||||
# This file is expected to be the JSON payload shape from AllDebrid:
|
||||
# {"status":"success","data":{"hosts":[...],"streams":[...],"redirectors":[...]}}
|
||||
return _repo_root() / "API" / "data" / "alldebrid.json"
|
||||
|
||||
|
||||
def _load_cached_domains(category: str) -> List[str]:
|
||||
"""Load cached domain list from API/data/alldebrid.json.
|
||||
|
||||
category: "hosts" | "streams" | "redirectors"
|
||||
"""
|
||||
|
||||
wanted = str(category or "").strip().lower()
|
||||
if wanted not in {"hosts", "streams", "redirectors"}:
|
||||
return []
|
||||
|
||||
path = _hosts_cache_path()
|
||||
try:
|
||||
if not path.exists() or not path.is_file():
|
||||
return []
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
if not isinstance(payload, dict):
|
||||
return []
|
||||
|
||||
data = payload.get("data")
|
||||
if not isinstance(data, dict):
|
||||
# Back-compat for older cache shapes.
|
||||
data = payload
|
||||
if not isinstance(data, dict):
|
||||
return []
|
||||
|
||||
raw_list = data.get(wanted)
|
||||
if not isinstance(raw_list, list):
|
||||
return []
|
||||
|
||||
out: List[str] = []
|
||||
seen: set[str] = set()
|
||||
for d in raw_list:
|
||||
try:
|
||||
dom = str(d or "").strip().lower()
|
||||
except Exception:
|
||||
continue
|
||||
if not dom:
|
||||
continue
|
||||
if dom.startswith("http://") or dom.startswith("https://"):
|
||||
# Accidentally stored as a URL; normalize to hostname.
|
||||
try:
|
||||
p = urlparse(dom)
|
||||
dom = str(p.hostname or "").strip().lower()
|
||||
except Exception:
|
||||
continue
|
||||
if dom.startswith("www."):
|
||||
dom = dom[4:]
|
||||
if not dom or dom in seen:
|
||||
continue
|
||||
seen.add(dom)
|
||||
out.append(dom)
|
||||
return out
|
||||
|
||||
|
||||
def _load_cached_hoster_domains() -> List[str]:
|
||||
# For URL routing (download-file), we intentionally use only the "hosts" list.
|
||||
# The "streams" list is extremely broad and would steal URLs from other providers.
|
||||
return _load_cached_domains("hosts")
|
||||
|
||||
|
||||
def _save_cached_hosts_payload(payload: Dict[str, Any]) -> None:
|
||||
path = _hosts_cache_path()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
return
|
||||
try:
|
||||
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
except Exception:
|
||||
return
|
||||
|
||||
|
||||
def _cache_is_fresh() -> bool:
|
||||
path = _hosts_cache_path()
|
||||
try:
|
||||
if not path.exists() or not path.is_file():
|
||||
return False
|
||||
mtime = float(path.stat().st_mtime)
|
||||
return (time.time() - mtime) < _HOSTS_CACHE_TTL_SECONDS
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _fetch_hosts_payload_v4_hosts() -> Optional[Dict[str, Any]]:
|
||||
"""Fetch the public AllDebrid hosts payload.
|
||||
|
||||
This intentionally does NOT require an API key.
|
||||
Endpoint referenced by user: https://api.alldebrid.com/v4/hosts
|
||||
"""
|
||||
|
||||
url = "https://api.alldebrid.com/v4/hosts"
|
||||
try:
|
||||
with HTTPClient(timeout=20.0) as client:
|
||||
resp = client.get(url)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return data if isinstance(data, dict) else None
|
||||
except Exception as exc:
|
||||
log(f"[alldebrid] Failed to fetch hosts list: {exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def refresh_alldebrid_hoster_cache(*, force: bool = False) -> None:
|
||||
"""Refresh the on-disk cache of host domains (best-effort)."""
|
||||
if (not force) and _cache_is_fresh():
|
||||
return
|
||||
|
||||
payload = _fetch_hosts_payload_v4_hosts()
|
||||
if isinstance(payload, dict) and payload:
|
||||
_save_cached_hosts_payload(payload)
|
||||
|
||||
|
||||
def _get_debrid_api_key(config: Dict[str, Any]) -> Optional[str]:
|
||||
@@ -177,7 +312,7 @@ def prepare_magnet(
|
||||
api_key = _get_debrid_api_key(config or {})
|
||||
if not api_key:
|
||||
try:
|
||||
from ProviderCore.registry import show_provider_config_panel
|
||||
from SYS.rich_display import show_provider_config_panel
|
||||
|
||||
show_provider_config_panel("alldebrid", ["api_key"])
|
||||
except Exception:
|
||||
@@ -193,7 +328,8 @@ def prepare_magnet(
|
||||
|
||||
try:
|
||||
magnet_info = client.magnet_add(magnet_spec)
|
||||
magnet_id = int(magnet_info.get("id", 0))
|
||||
magnet_id_val = magnet_info.get("id") or 0
|
||||
magnet_id = int(magnet_id_val)
|
||||
if magnet_id <= 0:
|
||||
log(f"AllDebrid magnet submission failed: {magnet_info}", file=sys.stderr)
|
||||
return None, None
|
||||
@@ -409,6 +545,26 @@ def adjust_output_dir_for_alldebrid(
|
||||
class AllDebrid(Provider):
|
||||
# Magnet URIs should be routed through this provider.
|
||||
URL = ("magnet:",)
|
||||
URL_DOMAINS = ()
|
||||
|
||||
@classmethod
|
||||
def url_patterns(cls) -> Tuple[str, ...]:
|
||||
# Combine static patterns with cached host domains.
|
||||
patterns = list(super().url_patterns())
|
||||
try:
|
||||
cached = _load_cached_hoster_domains()
|
||||
for d in cached:
|
||||
dom = str(d or "").strip().lower()
|
||||
if dom and dom not in patterns:
|
||||
patterns.append(dom)
|
||||
log(
|
||||
f"[alldebrid] url_patterns loaded {len(cached)} cached host domains; total patterns={len(patterns)}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return tuple(patterns)
|
||||
|
||||
"""Search provider for AllDebrid account content.
|
||||
|
||||
This provider lists and searches the files/magnets already present in the
|
||||
@@ -421,7 +577,15 @@ class AllDebrid(Provider):
|
||||
|
||||
def validate(self) -> bool:
|
||||
# Consider "available" when configured; actual API connectivity can vary.
|
||||
return bool(_get_debrid_api_key(self.config or {}))
|
||||
ok = bool(_get_debrid_api_key(self.config or {}))
|
||||
if ok:
|
||||
# Best-effort: refresh cached host domains so future URL routing can
|
||||
# route supported hosters through this provider.
|
||||
try:
|
||||
refresh_alldebrid_hoster_cache(force=False)
|
||||
except Exception:
|
||||
pass
|
||||
return ok
|
||||
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
"""Download an AllDebrid SearchResult into output_dir.
|
||||
@@ -435,10 +599,12 @@ class AllDebrid(Provider):
|
||||
try:
|
||||
api_key = _get_debrid_api_key(self.config or {})
|
||||
if not api_key:
|
||||
log("[alldebrid] download skipped: missing api_key", file=sys.stderr)
|
||||
return None
|
||||
|
||||
target = str(getattr(result, "path", "") or "").strip()
|
||||
if not target.startswith(("http://", "https://")):
|
||||
log(f"[alldebrid] download skipped: target not http(s): {target}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
try:
|
||||
@@ -449,35 +615,59 @@ class AllDebrid(Provider):
|
||||
log(f"[alldebrid] Failed to init client: {exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# Quiet mode when download-file is mid-pipeline.
|
||||
quiet = (
|
||||
bool(self.config.get("_quiet_background_output"))
|
||||
if isinstance(self.config,
|
||||
dict) else False
|
||||
)
|
||||
log(f"[alldebrid] download routing target={target}", file=sys.stderr)
|
||||
|
||||
unlocked_url = target
|
||||
try:
|
||||
unlocked = client.unlock_link(target)
|
||||
if isinstance(unlocked,
|
||||
str) and unlocked.strip().startswith(("http://",
|
||||
"https://")):
|
||||
unlocked_url = unlocked.strip()
|
||||
except Exception as exc:
|
||||
# Fall back to the raw link, but warn.
|
||||
log(f"[alldebrid] Failed to unlock link: {exc}", file=sys.stderr)
|
||||
|
||||
# Prefer provider title as the output filename.
|
||||
suggested = sanitize_filename(
|
||||
str(getattr(result,
|
||||
"title",
|
||||
"") or "").strip()
|
||||
)
|
||||
# Prefer provider title as the output filename; later we may override if unlocked URL has a better basename.
|
||||
suggested = sanitize_filename(str(getattr(result, "title", "") or "").strip())
|
||||
suggested_name = suggested if suggested else None
|
||||
|
||||
try:
|
||||
from SYS.download import _download_direct_file
|
||||
# Quiet mode when download-file is mid-pipeline.
|
||||
quiet = bool(self.config.get("_quiet_background_output")) if isinstance(self.config, dict) else False
|
||||
|
||||
def _html_guard(path: Path) -> bool:
|
||||
try:
|
||||
if path.exists():
|
||||
size = path.stat().st_size
|
||||
if size > 0 and size <= 250_000 and path.suffix.lower() not in (".html", ".htm"):
|
||||
head = path.read_bytes()[:512]
|
||||
try:
|
||||
text = head.decode("utf-8", errors="ignore").lower()
|
||||
except Exception:
|
||||
text = ""
|
||||
if "<html" in text or "<!doctype html" in text:
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
return False
|
||||
|
||||
def _download_unlocked(unlocked_url: str, *, allow_html: bool = False) -> Optional[Path]:
|
||||
# If this is an unlocked debrid link (allow_html=True), stream it directly and skip
|
||||
# the generic HTML guard to avoid falling back to the public hoster.
|
||||
if allow_html:
|
||||
try:
|
||||
from API.HTTP import HTTPClient
|
||||
|
||||
fname = suggested_name or sanitize_filename(Path(urlparse(unlocked_url).path).name)
|
||||
if not fname:
|
||||
fname = "download"
|
||||
if not Path(fname).suffix:
|
||||
fname = f"{fname}.bin"
|
||||
dest = Path(output_dir) / fname
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
with HTTPClient(timeout=30.0) as client:
|
||||
with client._request_stream("GET", unlocked_url, follow_redirects=True) as resp:
|
||||
resp.raise_for_status()
|
||||
with dest.open("wb") as fh:
|
||||
for chunk in resp.iter_bytes():
|
||||
if not chunk:
|
||||
continue
|
||||
fh.write(chunk)
|
||||
return dest if dest.exists() else None
|
||||
except Exception as exc2:
|
||||
log(f"[alldebrid] raw stream (unlocked) failed: {exc2}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# Otherwise, use standard downloader with guardrails.
|
||||
pipe_progress = None
|
||||
try:
|
||||
if isinstance(self.config, dict):
|
||||
@@ -485,47 +675,73 @@ class AllDebrid(Provider):
|
||||
except Exception:
|
||||
pipe_progress = None
|
||||
|
||||
dl_res = _download_direct_file(
|
||||
unlocked_url,
|
||||
Path(output_dir),
|
||||
quiet=quiet,
|
||||
suggested_filename=suggested_name,
|
||||
pipeline_progress=pipe_progress,
|
||||
)
|
||||
downloaded_path = getattr(dl_res, "path", None)
|
||||
if downloaded_path is None:
|
||||
return None
|
||||
downloaded_path = Path(str(downloaded_path))
|
||||
|
||||
# Guard: if we got an HTML error/redirect page, treat as failure.
|
||||
try:
|
||||
if downloaded_path.exists():
|
||||
size = downloaded_path.stat().st_size
|
||||
if (size > 0 and size <= 250_000
|
||||
and downloaded_path.suffix.lower() not in (".html",
|
||||
".htm")):
|
||||
head = downloaded_path.read_bytes()[:512]
|
||||
try:
|
||||
text = head.decode("utf-8", errors="ignore").lower()
|
||||
except Exception:
|
||||
text = ""
|
||||
if "<html" in text or "<!doctype html" in text:
|
||||
try:
|
||||
downloaded_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
log(
|
||||
"[alldebrid] Download returned HTML page (not file bytes). Try again or check AllDebrid link status.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return None
|
||||
dl_res = _download_direct_file(
|
||||
unlocked_url,
|
||||
Path(output_dir),
|
||||
quiet=quiet,
|
||||
suggested_filename=suggested_name,
|
||||
pipeline_progress=pipe_progress,
|
||||
)
|
||||
downloaded_path = getattr(dl_res, "path", None)
|
||||
if downloaded_path is None:
|
||||
return None
|
||||
downloaded_path = Path(str(downloaded_path))
|
||||
except DownloadError as exc:
|
||||
log(
|
||||
f"[alldebrid] _download_direct_file rejected URL ({exc}); no further fallback", file=sys.stderr
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
if _html_guard(downloaded_path):
|
||||
log(
|
||||
"[alldebrid] Download returned HTML page (not file bytes). Try again or check AllDebrid link status.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return downloaded_path if downloaded_path.exists() else None
|
||||
|
||||
unlocked_url = target
|
||||
try:
|
||||
unlocked = client.resolve_unlock_link(target, poll=True, max_wait_seconds=45, poll_interval_seconds=5)
|
||||
if isinstance(unlocked, str) and unlocked.strip().startswith(("http://", "https://")):
|
||||
unlocked_url = unlocked.strip()
|
||||
log(f"[alldebrid] unlock -> {unlocked_url}", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"[alldebrid] Download failed: {exc}", file=sys.stderr)
|
||||
return None
|
||||
log(f"[alldebrid] Failed to unlock link: {exc}", file=sys.stderr)
|
||||
|
||||
if unlocked_url != target:
|
||||
# Prefer filename from unlocked URL path.
|
||||
try:
|
||||
unlocked_name = sanitize_filename(Path(urlparse(unlocked_url).path).name)
|
||||
if unlocked_name:
|
||||
suggested_name = unlocked_name
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# When using an unlocked URL different from the original hoster, stream it directly and do NOT fall back to the public URL.
|
||||
allow_html = unlocked_url != target
|
||||
log(
|
||||
f"[alldebrid] downloading from {unlocked_url} (allow_html={allow_html})",
|
||||
file=sys.stderr,
|
||||
)
|
||||
downloaded = _download_unlocked(unlocked_url, allow_html=allow_html)
|
||||
if downloaded:
|
||||
log(f"[alldebrid] downloaded -> {downloaded}", file=sys.stderr)
|
||||
return downloaded
|
||||
|
||||
# If unlock failed entirely and we never changed URL, allow a single attempt on the original target.
|
||||
if unlocked_url == target:
|
||||
downloaded = _download_unlocked(target, allow_html=False)
|
||||
if downloaded:
|
||||
log(f"[alldebrid] downloaded (original target) -> {downloaded}", file=sys.stderr)
|
||||
return downloaded
|
||||
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@@ -620,9 +836,12 @@ class AllDebrid(Provider):
|
||||
if magnet_id_val is None:
|
||||
magnet_id_val = kwargs.get("magnet_id")
|
||||
|
||||
if magnet_id_val is None:
|
||||
return []
|
||||
|
||||
try:
|
||||
magnet_id = int(magnet_id_val)
|
||||
except Exception:
|
||||
except (TypeError, ValueError):
|
||||
return []
|
||||
|
||||
magnet_status: Dict[str,
|
||||
@@ -769,9 +988,12 @@ class AllDebrid(Provider):
|
||||
if not isinstance(magnet, dict):
|
||||
continue
|
||||
|
||||
magnet_id_val = magnet.get("id")
|
||||
if magnet_id_val is None:
|
||||
continue
|
||||
try:
|
||||
magnet_id = int(magnet.get("id"))
|
||||
except Exception:
|
||||
magnet_id = int(magnet_id_val)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
magnet_name = str(
|
||||
|
||||
@@ -224,11 +224,19 @@ def match_provider_name_for_url(url: str) -> Optional[str]:
|
||||
#
|
||||
# This keeps direct downloads and item pages routed to `internetarchive`, while
|
||||
# preserving OpenLibrary's scripted borrow pipeline for loan/reader URLs.
|
||||
if host:
|
||||
if host == "openlibrary.org" or host.endswith(".openlibrary.org"):
|
||||
def _norm_host(h: str) -> str:
|
||||
h_norm = str(h or "").strip().lower()
|
||||
if h_norm.startswith("www."):
|
||||
h_norm = h_norm[4:]
|
||||
return h_norm
|
||||
|
||||
host_norm = _norm_host(host)
|
||||
|
||||
if host_norm:
|
||||
if host_norm == "openlibrary.org" or host_norm.endswith(".openlibrary.org"):
|
||||
return "openlibrary" if "openlibrary" in _PROVIDERS else None
|
||||
|
||||
if host == "archive.org" or host.endswith(".archive.org"):
|
||||
if host_norm == "archive.org" or host_norm.endswith(".archive.org"):
|
||||
low_path = str(path or "").lower()
|
||||
is_borrowish = (
|
||||
low_path.startswith("/borrow/") or low_path.startswith("/stream/")
|
||||
@@ -243,16 +251,20 @@ def match_provider_name_for_url(url: str) -> Optional[str]:
|
||||
if not domains:
|
||||
continue
|
||||
for d in domains:
|
||||
dom = str(d or "").strip().lower()
|
||||
dom_raw = str(d or "").strip()
|
||||
dom = dom_raw.lower()
|
||||
if not dom:
|
||||
continue
|
||||
if raw_url_lower.startswith(dom):
|
||||
return name
|
||||
for d in domains:
|
||||
dom = str(d or "").strip().lower()
|
||||
if not dom or not host:
|
||||
# Scheme-like patterns (magnet:, http://example) still use prefix match.
|
||||
if dom.startswith("magnet:") or dom.startswith("http://") or dom.startswith("https://"):
|
||||
if raw_url_lower.startswith(dom):
|
||||
return name
|
||||
continue
|
||||
if host == dom or host.endswith("." + dom):
|
||||
|
||||
dom_norm = _norm_host(dom)
|
||||
if not dom_norm or not host_norm:
|
||||
continue
|
||||
if host_norm == dom_norm or host_norm.endswith("." + dom_norm):
|
||||
return name
|
||||
|
||||
return None
|
||||
|
||||
@@ -2,9 +2,13 @@ from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from urllib.parse import quote
|
||||
|
||||
import httpx
|
||||
|
||||
from SYS.logger import debug, log
|
||||
@@ -1099,6 +1103,94 @@ class HydrusNetwork(Store):
|
||||
debug(f"{self._log_prefix()} get_file: url={browser_url}")
|
||||
return browser_url
|
||||
|
||||
def download_to_temp(
|
||||
self,
|
||||
file_hash: str,
|
||||
*,
|
||||
temp_root: Optional[Path] = None,
|
||||
) -> Optional[Path]:
|
||||
"""Download a Hydrus file to a temporary path for downstream uploads."""
|
||||
|
||||
try:
|
||||
client = self._client
|
||||
if client is None:
|
||||
return None
|
||||
|
||||
h = str(file_hash or "").strip().lower()
|
||||
if len(h) != 64 or not all(ch in "0123456789abcdef" for ch in h):
|
||||
return None
|
||||
|
||||
created_tmp = False
|
||||
base_tmp = Path(temp_root) if temp_root is not None else Path(
|
||||
tempfile.mkdtemp(prefix="hydrus-file-")
|
||||
)
|
||||
if temp_root is None:
|
||||
created_tmp = True
|
||||
base_tmp.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _safe_filename(raw: str) -> str:
|
||||
cleaned = re.sub(r"[\\/:*?\"<>|]", "_", str(raw or "")).strip()
|
||||
if not cleaned:
|
||||
return h
|
||||
cleaned = cleaned.strip(". ") or h
|
||||
return cleaned
|
||||
|
||||
# Prefer ext/title from metadata when available.
|
||||
fname = h
|
||||
ext_val = ""
|
||||
try:
|
||||
meta = self.get_metadata(h) or {}
|
||||
if isinstance(meta, dict):
|
||||
title_val = str(meta.get("title") or "").strip()
|
||||
if title_val:
|
||||
fname = _safe_filename(title_val)
|
||||
ext_val = str(meta.get("ext") or "").strip().lstrip(".")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not fname:
|
||||
fname = h
|
||||
if ext_val and not fname.lower().endswith(f".{ext_val.lower()}"):
|
||||
fname = f"{fname}.{ext_val}"
|
||||
|
||||
try:
|
||||
file_url = client.file_url(h)
|
||||
except Exception:
|
||||
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={quote(h)}"
|
||||
|
||||
dest_path = base_tmp / fname
|
||||
with httpx.stream(
|
||||
"GET",
|
||||
file_url,
|
||||
headers={"Hydrus-Client-API-Access-Key": self.API},
|
||||
follow_redirects=True,
|
||||
timeout=60.0,
|
||||
verify=False,
|
||||
) as resp:
|
||||
resp.raise_for_status()
|
||||
with dest_path.open("wb") as fh:
|
||||
for chunk in resp.iter_bytes():
|
||||
if chunk:
|
||||
fh.write(chunk)
|
||||
|
||||
if dest_path.exists():
|
||||
return dest_path
|
||||
|
||||
if created_tmp:
|
||||
try:
|
||||
shutil.rmtree(base_tmp, ignore_errors=True)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
except Exception as exc:
|
||||
log(f"{self._log_prefix()} download_to_temp failed: {exc}", file=sys.stderr)
|
||||
try:
|
||||
if temp_root is None and "base_tmp" in locals():
|
||||
shutil.rmtree(base_tmp, ignore_errors=True) # type: ignore[arg-type]
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
|
||||
"""Delete a file from Hydrus, then clear the deletion record.
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ from pathlib import Path
|
||||
from copy import deepcopy
|
||||
import sys
|
||||
import shutil
|
||||
import tempfile
|
||||
import re
|
||||
|
||||
from SYS import models
|
||||
@@ -501,7 +502,7 @@ class Add_File(Cmdlet):
|
||||
temp_dir_to_cleanup: Optional[Path] = None
|
||||
delete_after_item = delete_after
|
||||
try:
|
||||
media_path, file_hash = self._resolve_source(
|
||||
media_path, file_hash, temp_dir_to_cleanup = self._resolve_source(
|
||||
item, path_arg, pipe_obj, config
|
||||
)
|
||||
debug(
|
||||
@@ -901,6 +902,38 @@ class Add_File(Cmdlet):
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
@staticmethod
|
||||
def _maybe_download_backend_file(
|
||||
backend: Any,
|
||||
file_hash: str,
|
||||
pipe_obj: models.PipeObject,
|
||||
) -> Tuple[Optional[Path], Optional[Path]]:
|
||||
"""Best-effort fetch of a backend file when get_file returns a URL.
|
||||
|
||||
Returns (downloaded_path, temp_dir_to_cleanup).
|
||||
"""
|
||||
|
||||
downloader = getattr(backend, "download_to_temp", None)
|
||||
if not callable(downloader):
|
||||
return None, None
|
||||
|
||||
tmp_dir: Optional[Path] = None
|
||||
try:
|
||||
tmp_dir = Path(tempfile.mkdtemp(prefix="add-file-src-"))
|
||||
downloaded = downloader(str(file_hash), temp_root=tmp_dir)
|
||||
if isinstance(downloaded, Path) and downloaded.exists():
|
||||
pipe_obj.is_temp = True
|
||||
return downloaded, tmp_dir
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if tmp_dir is not None:
|
||||
try:
|
||||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||||
except Exception:
|
||||
pass
|
||||
return None, None
|
||||
|
||||
@staticmethod
|
||||
def _resolve_source(
|
||||
result: Any,
|
||||
@@ -909,10 +942,11 @@ class Add_File(Cmdlet):
|
||||
config: Dict[str,
|
||||
Any],
|
||||
) -> Tuple[Optional[Path],
|
||||
Optional[str]]:
|
||||
Optional[str],
|
||||
Optional[Path]]:
|
||||
"""Resolve the source file path from args or pipeline result.
|
||||
|
||||
Returns (media_path, file_hash).
|
||||
Returns (media_path, file_hash, temp_dir_to_cleanup).
|
||||
"""
|
||||
# PRIORITY 1a: Try hash+path from directory scan result (has 'path' and 'hash' keys)
|
||||
if isinstance(result, dict):
|
||||
@@ -931,7 +965,7 @@ class Add_File(Cmdlet):
|
||||
f"[add-file] Using path+hash from directory scan: {media_path}"
|
||||
)
|
||||
pipe_obj.path = str(media_path)
|
||||
return media_path, str(result_hash)
|
||||
return media_path, str(result_hash), None
|
||||
except Exception as exc:
|
||||
debug(f"[add-file] Failed to use directory scan result: {exc}")
|
||||
|
||||
@@ -950,7 +984,17 @@ class Add_File(Cmdlet):
|
||||
media_path = backend.get_file(result_hash)
|
||||
if isinstance(media_path, Path) and media_path.exists():
|
||||
pipe_obj.path = str(media_path)
|
||||
return media_path, str(result_hash)
|
||||
return media_path, str(result_hash), None
|
||||
|
||||
if isinstance(media_path, str) and media_path.strip():
|
||||
downloaded, tmp_dir = Add_File._maybe_download_backend_file(
|
||||
backend,
|
||||
str(result_hash),
|
||||
pipe_obj,
|
||||
)
|
||||
if isinstance(downloaded, Path) and downloaded.exists():
|
||||
pipe_obj.path = str(downloaded)
|
||||
return downloaded, str(result_hash), tmp_dir
|
||||
except Exception as exc:
|
||||
debug(f"[add-file] Failed to retrieve via hash+store: {exc}")
|
||||
|
||||
@@ -959,7 +1003,7 @@ class Add_File(Cmdlet):
|
||||
media_path = Path(path_arg)
|
||||
pipe_obj.path = str(media_path)
|
||||
debug(f"[add-file] Using explicit path argument: {media_path}")
|
||||
return media_path, None
|
||||
return media_path, None, None
|
||||
|
||||
# PRIORITY 3: Try from pipe_obj.path (check file first before URL)
|
||||
pipe_path = getattr(pipe_obj, "path", None)
|
||||
@@ -976,8 +1020,8 @@ class Add_File(Cmdlet):
|
||||
"add-file ingests local files only. Use download-file first.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return None, None
|
||||
return Path(pipe_path_str), None
|
||||
return None, None, None
|
||||
return Path(pipe_path_str), None, None
|
||||
|
||||
# Try from result (if it's a string path or URL)
|
||||
if isinstance(result, str):
|
||||
@@ -993,10 +1037,10 @@ class Add_File(Cmdlet):
|
||||
"add-file ingests local files only. Use download-file first.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return None, None
|
||||
return None, None, None
|
||||
media_path = Path(result)
|
||||
pipe_obj.path = str(media_path)
|
||||
return media_path, None
|
||||
return media_path, None, None
|
||||
|
||||
# Try from result if it's a list (pipeline emits multiple results)
|
||||
if isinstance(result, list) and result:
|
||||
@@ -1014,10 +1058,10 @@ class Add_File(Cmdlet):
|
||||
"add-file ingests local files only. Use download-file first.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return None, None
|
||||
return None, None, None
|
||||
media_path = Path(first_item)
|
||||
pipe_obj.path = str(media_path)
|
||||
return media_path, None
|
||||
return media_path, None, None
|
||||
|
||||
# If the first item is a dict, interpret it as a PipeObject-style result
|
||||
if isinstance(first_item, dict):
|
||||
@@ -1037,9 +1081,9 @@ class Add_File(Cmdlet):
|
||||
try:
|
||||
media_path = Path(path_candidate)
|
||||
pipe_obj.path = str(media_path)
|
||||
return media_path, first_item.get("hash")
|
||||
return media_path, first_item.get("hash"), None
|
||||
except Exception:
|
||||
return None, first_item.get("hash")
|
||||
return None, first_item.get("hash"), None
|
||||
|
||||
# If first item is a PipeObject object
|
||||
try:
|
||||
@@ -1052,7 +1096,7 @@ class Add_File(Cmdlet):
|
||||
debug(f"Resolved path from PipeObject: {path_candidate}")
|
||||
media_path = Path(path_candidate)
|
||||
pipe_obj.path = str(media_path)
|
||||
return media_path, getattr(first_item, "hash", None)
|
||||
return media_path, getattr(first_item, "hash", None), None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -1060,7 +1104,7 @@ class Add_File(Cmdlet):
|
||||
f"No resolution path matched. pipe_obj.path={pipe_path}, result type={type(result).__name__}"
|
||||
)
|
||||
log("File path could not be resolved")
|
||||
return None, None
|
||||
return None, None, None
|
||||
|
||||
@staticmethod
|
||||
def _scan_directory_for_files(directory: Path) -> List[Dict[str, Any]]:
|
||||
@@ -1778,6 +1822,12 @@ class Add_File(Cmdlet):
|
||||
store = Store(config)
|
||||
backend = store[backend_name]
|
||||
|
||||
hydrus_like_backend = False
|
||||
try:
|
||||
hydrus_like_backend = str(type(backend).__name__ or "").lower().startswith("hydrus")
|
||||
except Exception:
|
||||
hydrus_like_backend = False
|
||||
|
||||
# Prepare metadata from pipe_obj and sidecars
|
||||
tags, url, title, f_hash = Add_File._prepare_metadata(
|
||||
result, media_path, pipe_obj, config
|
||||
@@ -1870,6 +1920,11 @@ class Add_File(Cmdlet):
|
||||
log(f"[add-file] FlorenceVision tagging error: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
upload_tags = tags
|
||||
if hydrus_like_backend and upload_tags:
|
||||
upload_tags = []
|
||||
debug("[add-file] Deferring tag application until after Hydrus upload")
|
||||
|
||||
debug(
|
||||
f"[add-file] Storing into backend '{backend_name}' path='{media_path}' title='{title}'"
|
||||
)
|
||||
@@ -1879,7 +1934,7 @@ class Add_File(Cmdlet):
|
||||
file_identifier = backend.add_file(
|
||||
media_path,
|
||||
title=title,
|
||||
tag=tags,
|
||||
tag=upload_tags,
|
||||
url=[] if (defer_url_association and url) else url,
|
||||
)
|
||||
debug(
|
||||
@@ -1921,6 +1976,17 @@ class Add_File(Cmdlet):
|
||||
(f_hash or file_identifier or "unknown")
|
||||
)
|
||||
|
||||
if hydrus_like_backend and tags:
|
||||
try:
|
||||
adder = getattr(backend, "add_tag", None)
|
||||
if callable(adder):
|
||||
debug(
|
||||
f"[add-file] Applying {len(tags)} tag(s) post-upload to Hydrus"
|
||||
)
|
||||
adder(resolved_hash, list(tags))
|
||||
except Exception as exc:
|
||||
log(f"[add-file] Hydrus post-upload tagging failed: {exc}", file=sys.stderr)
|
||||
|
||||
# If we have url(s), ensure they get associated with the destination file.
|
||||
# This mirrors `add-url` behavior but avoids emitting extra pipeline noise.
|
||||
if url:
|
||||
|
||||
Reference in New Issue
Block a user