This commit is contained in:
2026-01-01 20:37:27 -08:00
parent f3c79609d8
commit deb05c0d44
35 changed files with 5030 additions and 4879 deletions

View File

@@ -1,11 +1,17 @@
from __future__ import annotations
from pathlib import Path
import hashlib
import sys
from typing import Any, Dict, Iterable, List, Optional
import time
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Callable, Tuple
from urllib.parse import urlparse
from API.HTTP import HTTPClient
from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_magnet_link, is_torrent_file
from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import sanitize_filename
from SYS.download import _download_direct_file
from SYS.logger import log
@@ -53,7 +59,356 @@ def _get_debrid_api_key(config: Dict[str, Any]) -> Optional[str]:
return None
def _consume_bencoded_value(data: bytes, pos: int) -> int:
if pos >= len(data):
raise ValueError("Unexpected end of bencode")
token = data[pos:pos + 1]
if token == b"i":
end = data.find(b"e", pos + 1)
if end == -1:
raise ValueError("Unterminated integer")
return end + 1
if token == b"l" or token == b"d":
cursor = pos + 1
while cursor < len(data):
if data[cursor:cursor + 1] == b"e":
return cursor + 1
cursor = _consume_bencoded_value(data, cursor)
raise ValueError("Unterminated list/dict")
if token and b"0" <= token <= b"9":
colon = data.find(b":", pos)
if colon == -1:
raise ValueError("Invalid string length")
length = int(data[pos:colon])
return colon + 1 + length
raise ValueError("Unknown bencode token")
def _info_hash_from_torrent_bytes(data: bytes) -> Optional[str]:
needle = b"4:info"
idx = data.find(needle)
if idx == -1:
return None
start = idx + len(needle)
try:
end = _consume_bencoded_value(data, start)
except ValueError:
return None
info_bytes = data[start:end]
try:
return hashlib.sha1(info_bytes).hexdigest()
except Exception:
return None
def _fetch_torrent_bytes(target: str) -> Optional[bytes]:
path_obj = Path(str(target))
try:
if path_obj.exists() and path_obj.is_file():
return path_obj.read_bytes()
except Exception:
pass
try:
parsed = urlparse(target)
except Exception:
parsed = None
if parsed is None or not parsed.scheme or parsed.scheme.lower() not in {"http", "https"}:
return None
if not target.lower().endswith(".torrent"):
return None
try:
with HTTPClient(timeout=30.0) as client:
response = client.get(target)
return response.content
except Exception as exc:
log(f"Failed to download .torrent from {target}: {exc}", file=sys.stderr)
return None
def resolve_magnet_spec(target: str) -> Optional[str]:
"""Resolve a magnet/hash/torrent URL into a magnet/hash string."""
candidate = str(target or "").strip()
if not candidate:
return None
parsed = parse_magnet_or_hash(candidate)
if parsed:
return parsed
if is_torrent_file(candidate):
torrent_bytes = _fetch_torrent_bytes(candidate)
if not torrent_bytes:
return None
hash_value = _info_hash_from_torrent_bytes(torrent_bytes)
if hash_value:
return hash_value
return None
def _dispatch_alldebrid_magnet_search(
magnet_id: int,
config: Dict[str, Any],
) -> None:
try:
from cmdlet.search_file import CMDLET as _SEARCH_FILE_CMDLET
exec_fn = getattr(_SEARCH_FILE_CMDLET, "exec", None)
if callable(exec_fn):
exec_fn(
None,
["-provider", "alldebrid", f"ID={magnet_id}"],
config,
)
except Exception:
pass
log(f"[alldebrid] Sent magnet {magnet_id} to AllDebrid for download", file=sys.stderr)
def prepare_magnet(
magnet_spec: str,
config: Dict[str, Any],
) -> tuple[Optional[AllDebridClient], Optional[int]]:
api_key = _get_debrid_api_key(config or {})
if not api_key:
try:
from ProviderCore.registry import show_provider_config_panel
show_provider_config_panel("alldebrid", ["api_key"])
except Exception:
pass
log("AllDebrid API key not configured (provider.alldebrid.api_key)", file=sys.stderr)
return None, None
try:
client = AllDebridClient(api_key)
except Exception as exc:
log(f"Failed to initialize AllDebrid client: {exc}", file=sys.stderr)
return None, None
try:
magnet_info = client.magnet_add(magnet_spec)
magnet_id = int(magnet_info.get("id", 0))
if magnet_id <= 0:
log(f"AllDebrid magnet submission failed: {magnet_info}", file=sys.stderr)
return None, None
except Exception as exc:
log(f"Failed to submit magnet to AllDebrid: {exc}", file=sys.stderr)
return None, None
_dispatch_alldebrid_magnet_search(magnet_id, config)
return client, magnet_id
def _flatten_files_with_relpath(items: Any) -> Iterable[Dict[str, Any]]:
for node in AllDebrid._flatten_files(items):
enriched = dict(node)
rel = node.get("_relpath") or node.get("relpath")
if not rel:
name = node.get("n") or node.get("name")
rel = str(name or "").strip()
enriched["relpath"] = rel
yield enriched
def download_magnet(
magnet_spec: str,
original_url: str,
final_output_dir: Path,
config: Dict[str, Any],
progress: Any,
quiet_mode: bool,
path_from_result: Callable[[Any], Path],
on_emit: Callable[[Path, str, str, Dict[str, Any]], None],
) -> tuple[int, Optional[int]]:
client, magnet_id = prepare_magnet(magnet_spec, config)
if client is None or magnet_id is None:
return 0, None
wait_timeout = 300
try:
streaming_config = config.get("streaming", {}) if isinstance(config, dict) else {}
wait_timeout = int(streaming_config.get("wait_timeout", 300))
except Exception:
wait_timeout = 300
elapsed = 0
while elapsed < wait_timeout:
try:
status = client.magnet_status(magnet_id)
except Exception as exc:
log(f"Failed to read magnet status {magnet_id}: {exc}", file=sys.stderr)
return 0, magnet_id
ready = bool(status.get("ready")) or status.get("statusCode") == 4
if ready:
break
time.sleep(5)
elapsed += 5
else:
log(f"AllDebrid magnet {magnet_id} timed out after {wait_timeout}s", file=sys.stderr)
return 0, magnet_id
try:
files_result = client.magnet_links([magnet_id])
except Exception as exc:
log(f"Failed to list AllDebrid magnet files: {exc}", file=sys.stderr)
return 0, magnet_id
magnet_files = files_result.get(str(magnet_id), {}) if isinstance(files_result, dict) else {}
file_nodes = magnet_files.get("files") if isinstance(magnet_files, dict) else []
if not file_nodes:
log(f"AllDebrid magnet {magnet_id} produced no files", file=sys.stderr)
return 0, magnet_id
downloaded = 0
for node in _flatten_files_with_relpath(file_nodes):
file_url = str(node.get("link") or "").strip()
file_name = str(node.get("name") or "").strip()
relpath = str(node.get("relpath") or file_name).strip()
if not file_url or not relpath:
continue
target_path = final_output_dir
rel_path_obj = Path(relpath)
output_dir = target_path
if rel_path_obj.parent:
output_dir = target_path / rel_path_obj.parent
try:
output_dir.mkdir(parents=True, exist_ok=True)
except Exception:
output_dir = target_path
try:
result_obj = _download_direct_file(
file_url,
output_dir,
quiet=quiet_mode,
suggested_filename=rel_path_obj.name,
pipeline_progress=progress,
)
except Exception as exc:
log(f"Failed to download AllDebrid file {file_url}: {exc}", file=sys.stderr)
continue
downloaded_path = path_from_result(result_obj)
metadata = {
"magnet_id": magnet_id,
"relpath": relpath,
"name": file_name,
}
on_emit(downloaded_path, file_url or original_url, relpath, metadata)
downloaded += 1
return downloaded, magnet_id
def expand_folder_item(
item: Any,
get_search_provider: Optional[Callable[[str, Dict[str, Any]], Any]],
config: Dict[str, Any],
) -> Tuple[List[Any], Optional[str]]:
table = getattr(item, "table", None) if not isinstance(item, dict) else item.get("table")
media_kind = getattr(item, "media_kind", None) if not isinstance(item, dict) else item.get("media_kind")
full_metadata = getattr(item, "full_metadata", None) if not isinstance(item, dict) else item.get("full_metadata")
target = None
if isinstance(item, dict):
target = item.get("path") or item.get("url")
else:
target = getattr(item, "path", None) or getattr(item, "url", None)
if (str(table or "").lower() != "alldebrid") or (str(media_kind or "").lower() != "folder"):
return [], None
magnet_id = None
if isinstance(full_metadata, dict):
magnet_id = full_metadata.get("magnet_id")
if magnet_id is None and isinstance(target, str) and target.lower().startswith("alldebrid:magnet:"):
try:
magnet_id = int(target.split(":")[-1])
except Exception:
magnet_id = None
if magnet_id is None or get_search_provider is None:
return [], None
provider = get_search_provider("alldebrid", config) if get_search_provider else None
if provider is None:
return [], None
try:
files = provider.search("*", limit=10_000, filters={"view": "files", "magnet_id": int(magnet_id)})
except Exception:
files = []
if files and len(files) == 1 and getattr(files[0], "media_kind", "") == "folder":
detail = getattr(files[0], "detail", "")
return [], str(detail or "unknown")
expanded: List[Any] = []
for sr in files:
expanded.append(sr.to_dict() if hasattr(sr, "to_dict") else sr)
return expanded, None
def adjust_output_dir_for_alldebrid(
base_output_dir: Path,
full_metadata: Optional[Dict[str, Any]],
item: Any,
) -> Path:
from ProviderCore.download import sanitize_filename as _sf
output_dir = base_output_dir
md = full_metadata if isinstance(full_metadata, dict) else {}
magnet_name = md.get("magnet_name") or md.get("folder")
if not magnet_name:
try:
detail_val = getattr(item, "detail", None) if not isinstance(item, dict) else item.get("detail")
magnet_name = str(detail_val or "").strip() or None
except Exception:
magnet_name = None
magnet_dir_name = _sf(str(magnet_name)) if magnet_name else ""
try:
base_tail = str(Path(output_dir).name or "")
except Exception:
base_tail = ""
base_tail_norm = _sf(base_tail).lower() if base_tail.strip() else ""
magnet_dir_norm = magnet_dir_name.lower() if magnet_dir_name else ""
if magnet_dir_name and (not base_tail_norm or base_tail_norm != magnet_dir_norm):
output_dir = Path(output_dir) / magnet_dir_name
relpath = md.get("relpath") if isinstance(md, dict) else None
if (not relpath) and isinstance(md.get("file"), dict):
relpath = md["file"].get("_relpath")
if relpath:
parts = [p for p in str(relpath).replace("\\", "/").split("/") if p and p not in {".", ".."}]
if magnet_dir_name and parts:
try:
if _sf(parts[0]).lower() == magnet_dir_norm:
parts = parts[1:]
except Exception:
pass
for part in parts[:-1]:
output_dir = Path(output_dir) / _sf(part)
try:
Path(output_dir).mkdir(parents=True, exist_ok=True)
except Exception:
output_dir = base_output_dir
return output_dir
class AllDebrid(Provider):
# Magnet URIs should be routed through this provider.
URL = ("magnet:",)
"""Search provider for AllDebrid account content.
This provider lists and searches the files/magnets already present in the
@@ -311,7 +666,10 @@ class AllDebrid(Provider):
],
full_metadata={
"magnet": magnet_status,
"magnet_id": magnet_id
"magnet_id": magnet_id,
"provider": "alldebrid",
"provider_view": "files",
"magnet_name": magnet_name,
},
)
]
@@ -382,6 +740,8 @@ class AllDebrid(Provider):
"magnet_name": magnet_name,
"relpath": relpath,
"file": file_node,
"provider": "alldebrid",
"provider_view": "files",
},
)
)
@@ -465,7 +825,10 @@ class AllDebrid(Provider):
],
full_metadata={
"magnet": magnet,
"magnet_id": magnet_id
"magnet_id": magnet_id,
"provider": "alldebrid",
"provider_view": "folders",
"magnet_name": magnet_name,
},
)
)
@@ -474,3 +837,128 @@ class AllDebrid(Provider):
break
return results
def selector(
self,
selected_items: List[Any],
*,
ctx: Any,
stage_is_last: bool = True,
**_kwargs: Any,
) -> bool:
"""Handle AllDebrid `@N` selection by drilling into magnet files."""
if not stage_is_last:
return False
def _as_payload(item: Any) -> Dict[str, Any]:
if isinstance(item, dict):
return dict(item)
try:
if hasattr(item, "to_dict"):
maybe = item.to_dict() # type: ignore[attr-defined]
if isinstance(maybe, dict):
return maybe
except Exception:
pass
payload: Dict[str, Any] = {}
try:
payload = {
"title": getattr(item, "title", None),
"path": getattr(item, "path", None),
"table": getattr(item, "table", None),
"annotations": getattr(item, "annotations", None),
"media_kind": getattr(item, "media_kind", None),
"full_metadata": getattr(item, "full_metadata", None),
}
except Exception:
payload = {}
return payload
chosen: List[Dict[str, Any]] = []
for item in selected_items or []:
payload = _as_payload(item)
meta = payload.get("full_metadata") or payload.get("metadata") or {}
if not isinstance(meta, dict):
meta = {}
ann_set: set[str] = set()
for ann_source in (payload.get("annotations"), meta.get("annotations")):
if isinstance(ann_source, (list, tuple, set)):
for ann in ann_source:
ann_text = str(ann or "").strip().lower()
if ann_text:
ann_set.add(ann_text)
media_kind = str(payload.get("media_kind") or meta.get("media_kind") or "").strip().lower()
is_folder = (media_kind == "folder") or ("folder" in ann_set)
magnet_id = meta.get("magnet_id")
if magnet_id is None or (not is_folder):
continue
title = str(payload.get("title") or meta.get("magnet_name") or meta.get("name") or "").strip()
if not title:
title = f"magnet-{magnet_id}"
chosen.append({
"magnet_id": magnet_id,
"title": title,
})
if not chosen:
return False
target = chosen[0]
magnet_id = target.get("magnet_id")
title = target.get("title") or f"magnet-{magnet_id}"
try:
files = self.search("*", limit=200, filters={"view": "files", "magnet_id": magnet_id})
except Exception as exc:
print(f"alldebrid selector failed: {exc}\n")
return True
try:
from SYS.result_table import ResultTable
from SYS.rich_display import stdout_console
except Exception:
return True
table = ResultTable(f"AllDebrid Files: {title}").set_preserve_order(True)
table.set_table("alldebrid")
try:
table.set_table_metadata({"provider": "alldebrid", "view": "files", "magnet_id": magnet_id})
except Exception:
pass
table.set_source_command(
"search-file",
["-provider", "alldebrid", "-open", str(magnet_id), "-query", "*"],
)
results_payload: List[Dict[str, Any]] = []
for r in files or []:
table.add_result(r)
try:
results_payload.append(r.to_dict())
except Exception:
results_payload.append(
{
"table": getattr(r, "table", "alldebrid"),
"title": getattr(r, "title", ""),
"path": getattr(r, "path", ""),
"full_metadata": getattr(r, "full_metadata", None),
}
)
try:
ctx.set_last_result_table(table, results_payload)
ctx.set_current_stage_table(table)
except Exception:
pass
try:
stdout_console().print()
stdout_console().print(table)
except Exception:
pass
return True

View File

@@ -13,6 +13,156 @@ from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import sanitize_filename
from SYS.logger import log
# Helper for download-file: render selectable formats for a details URL.
def maybe_show_formats_table(
*,
raw_urls: Any,
piped_items: Any,
parsed: Dict[str, Any],
config: Dict[str, Any],
quiet_mode: bool,
get_field: Any,
) -> Optional[int]:
"""If input is a single Internet Archive details URL, render a formats table.
Returns an exit code when handled; otherwise None.
"""
if quiet_mode:
return None
try:
total_inputs = int(len(raw_urls or []) + len(piped_items or []))
except Exception:
total_inputs = 0
if total_inputs != 1:
return None
item = piped_items[0] if piped_items else None
target = ""
if item is not None:
try:
target = str(get_field(item,
"path") or get_field(item,
"url") or "").strip()
except Exception:
target = ""
if not target and raw_urls:
target = str(raw_urls[0]).strip()
if not target:
return None
identifier = ""
try:
md = get_field(item, "full_metadata") if item is not None else None
if isinstance(md, dict):
identifier = str(md.get("identifier") or "").strip()
except Exception:
identifier = ""
if not identifier:
try:
identifier = str(extract_identifier(target) or "").strip()
except Exception:
identifier = ""
if not identifier:
return None
# Only show picker for item pages (details); direct download URLs should download immediately.
try:
if not is_details_url(target):
return None
except Exception:
return None
try:
files = list_download_files(identifier)
except Exception as exc:
log(f"download-file: Internet Archive lookup failed: {exc}", file=sys.stderr)
return 1
if not files:
log("download-file: Internet Archive item has no downloadable files", file=sys.stderr)
return 1
title = ""
try:
title = str(get_field(item, "title") or "").strip() if item is not None else ""
except Exception:
title = ""
table_title = (
f"Internet Archive: {title}".strip().rstrip(":")
if title else f"Internet Archive: {identifier}"
)
try:
from SYS.result_table import ResultTable
from SYS import pipeline as pipeline_context
except Exception as exc:
log(f"download-file: ResultTable unavailable: {exc}", file=sys.stderr)
return 1
base_args: List[str] = []
out_arg = parsed.get("path") or parsed.get("output")
if out_arg:
base_args.extend(["-path", str(out_arg)])
table = ResultTable(table_title).set_preserve_order(True)
table.set_table("internetarchive.formats")
table.set_source_command("download-file", base_args)
rows: List[Dict[str, Any]] = []
for f in files:
name = str(f.get("name") or "").strip()
if not name:
continue
fmt = str(f.get("format") or "").strip()
src = str(f.get("source") or "").strip()
direct_url = str(f.get("direct_url") or "").strip()
if not direct_url:
continue
size_val: Any = f.get("size")
try:
size_val = int(size_val) if size_val not in (None, "") else ""
except Exception:
pass
row_item: Dict[str, Any] = {
"table": "internetarchive",
"title": fmt or name,
"path": direct_url,
"url": direct_url,
"columns": [
("Format", fmt),
("Name", name),
("Size", size_val),
("Source", src),
],
"_selection_args": [direct_url],
"full_metadata": {
"identifier": identifier,
"name": name,
"format": fmt,
"source": src,
"size": f.get("size"),
},
}
rows.append(row_item)
table.add_result(row_item)
if not rows:
log("download-file: no downloadable files found for this item", file=sys.stderr)
return 1
try:
pipeline_context.set_last_result_table(table, rows, subject=item)
pipeline_context.set_current_stage_table(table)
except Exception:
pass
log("Internet Archive item detected: select a file with @N to download", file=sys.stderr)
return 0
def _ia() -> Any:
try:
@@ -322,6 +472,7 @@ class InternetArchive(Provider):
collection="..." # optional (upload)
mediatype="..." # optional (upload)
"""
URL = ("archive.org",)
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)

View File

@@ -665,6 +665,7 @@ class Libgen(Provider):
"libgen.rs",
"libgen.st",
)
URL = URL_DOMAINS
"""Search provider for Library Genesis books."""
def search(

View File

@@ -15,6 +15,7 @@ class LOC(Provider):
"""
URL_DOMAINS = ["www.loc.gov"]
URL = URL_DOMAINS
def validate(self) -> bool:
return True

View File

@@ -229,6 +229,30 @@ def _archive_id_from_url(url: str) -> str:
"advancedsearch.php"}:
return first
def edition_id_from_url(u: str) -> str:
"""Extract an OpenLibrary edition id (OL...M) from a book URL."""
try:
p = urlparse(str(u))
parts = [x for x in (p.path or "").split("/") if x]
except Exception:
parts = []
if len(parts) >= 2 and str(parts[0]).lower() == "books":
return str(parts[1]).strip()
return ""
def title_hint_from_url_slug(u: str) -> str:
"""Derive a human-friendly title hint from the URL slug."""
try:
p = urlparse(str(u))
parts = [x for x in (p.path or "").split("/") if x]
slug = parts[-1] if parts else ""
except Exception:
slug = ""
slug = (slug or "").strip().replace("_", " ")
return slug or "OpenLibrary"
return ""
@@ -415,6 +439,7 @@ class OpenLibrary(Provider):
"openlibrary.org",
"archive.org",
)
URL = URL_DOMAINS
"""Search provider for OpenLibrary books + Archive.org direct/borrow download."""
def __init__(self, config: Optional[Dict[str, Any]] = None):
@@ -1419,6 +1444,64 @@ class OpenLibrary(Provider):
log("[openlibrary] Direct download failed", file=sys.stderr)
return None
# --- Convenience helpers for URL-driven downloads (used by download-file) ---
def search_result_from_url(self, url: str) -> Optional[SearchResult]:
"""Build a minimal SearchResult from a bare OpenLibrary URL."""
edition_id = edition_id_from_url(url)
title_hint = title_hint_from_url_slug(url)
return SearchResult(
table="openlibrary",
title=title_hint,
path=str(url),
media_kind="book",
full_metadata={"openlibrary_id": edition_id} if edition_id else {},
)
def download_url(
self,
url: str,
output_dir: Path,
progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
) -> Optional[Dict[str, Any]]:
"""Download a book directly from an OpenLibrary URL.
Returns a dict with the downloaded path and SearchResult when successful.
"""
sr = self.search_result_from_url(url)
if sr is None:
return None
downloaded = self.download(sr, output_dir, progress_callback)
if not downloaded:
return None
return {
"path": Path(downloaded),
"search_result": sr,
}
try:
if progress_callback is not None:
progress_callback("step", 0, None, "direct download")
except Exception:
pass
out_path = unique_path(output_dir / f"{safe_title}.pdf")
ok = download_file(
pdf_url,
out_path,
session=self._session,
progress_callback=(
(
lambda downloaded, total, label:
progress_callback("bytes", downloaded, total, label)
) if progress_callback is not None else None
),
)
if ok:
return out_path
log("[openlibrary] Direct download failed", file=sys.stderr)
return None
# 2) Borrow flow (credentials required).
try:
email, password = self._credential_archive(self.config or {})

View File

@@ -145,7 +145,9 @@ class Telegram(Provider):
[provider=telegram]
app_id=
api_hash=
bot_token=
"""
URL = ("t.me", "telegram.me")
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)