This commit is contained in:
2026-01-05 07:51:19 -08:00
parent 8545367e28
commit 1f765cffda
32 changed files with 3447 additions and 3250 deletions

442
Provider/torrent.py Normal file
View File

@@ -0,0 +1,442 @@
from __future__ import annotations
import logging
import re
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
import requests
from ProviderCore.base import Provider, SearchResult
from SYS.logger import debug, log
try: # Preferred HTML parser
from lxml import html as lxml_html
except Exception: # pragma: no cover - optional
lxml_html = None # type: ignore
logger = logging.getLogger(__name__)
@dataclass
class TorrentInfo:
name: str
url: str
seeders: int
leechers: int
size: str
source: str
category: Optional[str] = None
uploader: Optional[str] = None
magnet: Optional[str] = None
@dataclass
class SearchParams:
name: str
category: Optional[str] = None
order_column: Optional[str] = None
order_ascending: bool = False
_MAGNET_RE = re.compile(r"^magnet", re.IGNORECASE)
class Scraper:
def __init__(self, name: str, base_url: str, timeout: float = 10.0) -> None:
self.name = name
self.base = base_url.rstrip("/")
self.timeout = timeout
self.headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"
)
}
self.params: Optional[SearchParams] = None
def find(self, params: SearchParams, pages: int = 1) -> List[TorrentInfo]:
self.params = params
results: List[TorrentInfo] = []
for page in range(1, max(1, pages) + 1):
try:
results.extend(self._get_page(page))
except Exception as exc:
debug(f"[{self.name}] page fetch failed: {exc}")
return results
def _get_page(self, page: int) -> List[TorrentInfo]:
url, payload = self._request_data(page)
try:
resp = requests.get(
url,
params=payload,
headers=self.headers,
timeout=self.timeout,
)
resp.raise_for_status()
return self._parse_search(resp)
except Exception as exc:
debug(f"[{self.name}] request failed: {exc}")
return []
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
return self.base, {}
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]: # pragma: no cover - interface
raise NotImplementedError
def _parse_detail(self, url: str) -> Optional[str]: # optional override
try:
resp = requests.get(url, headers=self.headers, timeout=self.timeout)
resp.raise_for_status()
return self._parse_detail_response(resp)
except Exception:
return None
def _parse_detail_response(self, response: requests.Response) -> Optional[str]: # pragma: no cover - interface
return None
@staticmethod
def _int_from_text(value: Any) -> int:
try:
return int(str(value).strip().replace(",", ""))
except Exception:
return 0
class NyaaScraper(Scraper):
def __init__(self) -> None:
super().__init__("nyaa.si", "https://nyaa.si")
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
params = self.params or SearchParams(name="")
payload = {
"p": page,
"q": params.name,
"c": params.category or "0_0",
"f": "0",
}
if params.order_column:
payload["s"] = params.order_column
payload["o"] = "asc" if params.order_ascending else "desc"
return f"{self.base}/", payload
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
if lxml_html is None:
return []
doc = lxml_html.fromstring(response.text)
rows = doc.xpath("//table//tbody/tr")
results: List[TorrentInfo] = []
for row in rows:
cells = row.xpath("./td")
if len(cells) < 7:
continue
category_cell, name_cell, links_cell, size_cell, _, seed_cell, leech_cell, *_ = cells
name_links = name_cell.xpath("./a")
name_tag = name_links[1] if len(name_links) > 1 else (name_links[0] if name_links else None)
if name_tag is None:
continue
name = name_tag.get("title") or (name_tag.text_content() or "").strip()
url = name_tag.get("href") or ""
magnet_link = None
magnet_candidates = links_cell.xpath('.//a[starts-with(@href,"magnet:")]/@href')
if magnet_candidates:
magnet_link = magnet_candidates[0]
category_title = None
cat_titles = category_cell.xpath(".//a/@title")
if cat_titles:
category_title = cat_titles[0]
results.append(
TorrentInfo(
name=name,
url=f"{self.base}{url}",
seeders=self._int_from_text(seed_cell.text_content()),
leechers=self._int_from_text(leech_cell.text_content()),
size=(size_cell.text_content() or "").strip(),
source=self.name,
category=category_title,
magnet=magnet_link,
)
)
return results
class X1337Scraper(Scraper):
def __init__(self) -> None:
super().__init__("1337x.to", "https://1337x.to")
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
params = self.params or SearchParams(name="")
order = None
if params.order_column:
direction = "asc" if params.order_ascending else "desc"
order = f"{params.order_column}/{direction}"
category = params.category
name = requests.utils.quote(params.name)
if order and category:
path = f"/sort-category-search/{name}/{category}/{order}"
elif category:
path = f"/category-search/{name}/{category}"
elif order:
path = f"/sort-search/{name}/{order}"
else:
path = f"/search/{name}"
url = f"{self.base}{path}/{page}/"
return url, {}
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
if lxml_html is None:
return []
doc = lxml_html.fromstring(response.text)
rows = doc.xpath("//table//tbody/tr")
results: List[TorrentInfo] = []
for row in rows:
cells = row.xpath("./td")
if len(cells) < 6:
continue
name_cell, seeds_cell, leech_cell, _, size_cell, uploader_cell = cells
links = name_cell.xpath(".//a")
if len(links) < 2:
continue
torrent_path = links[1].get("href")
torrent_url = f"{self.base}{torrent_path}" if torrent_path else ""
info = TorrentInfo(
name=(links[1].text_content() or "").strip(),
url=torrent_url,
seeders=self._int_from_text(seeds_cell.text_content()),
leechers=self._int_from_text(leech_cell.text_content()),
size=(size_cell.text_content() or "").strip().replace(",", ""),
source=self.name,
uploader=(uploader_cell.text_content() or "").strip() if uploader_cell is not None else None,
)
if not info.magnet:
info.magnet = self._parse_detail(info.url)
results.append(info)
return results
def _parse_detail_response(self, response: requests.Response) -> Optional[str]:
if lxml_html is None:
return None
doc = lxml_html.fromstring(response.text)
links = doc.xpath("//main//a[starts-with(@href,'magnet:')]/@href")
return links[0] if links else None
class YTSScraper(Scraper):
TRACKERS = "&tr=".join(
[
"udp://open.demonii.com:1337/announce",
"udp://tracker.opentrackr.org:1337/announce",
"udp://tracker.leechers-paradise.org:6969",
]
)
def __init__(self) -> None:
super().__init__("yts.mx", "https://yts.mx/api/v2")
self.headers = {}
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
params = self.params or SearchParams(name="")
payload = {
"limit": 50,
"page": page,
"query_term": params.name,
"sort_by": "seeds",
"order_by": "desc" if not params.order_ascending else "asc",
}
return f"{self.base}/list_movies.json", payload
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
results: List[TorrentInfo] = []
data = response.json()
if data.get("status") != "ok":
return results
movies = (data.get("data") or {}).get("movies") or []
for movie in movies:
torrents = movie.get("torrents") or []
if not torrents:
continue
tor = max(torrents, key=lambda t: t.get("seeds", 0))
name = movie.get("title") or "unknown"
info = TorrentInfo(
name=name,
url=str(movie.get("id") or ""),
seeders=int(tor.get("seeds", 0) or 0),
leechers=int(tor.get("peers", 0) or 0),
size=str(tor.get("size") or ""),
source=self.name,
category=(movie.get("genres") or [None])[0],
magnet=self._build_magnet(tor, name),
)
results.append(info)
return results
def _build_magnet(self, torrent: Dict[str, Any], name: str) -> str:
return (
f"magnet:?xt=urn:btih:{torrent.get('hash')}"
f"&dn={requests.utils.quote(name)}&tr={self.TRACKERS}"
)
class ApiBayScraper(Scraper):
"""Scraper for apibay.org (The Pirate Bay API clone)."""
def __init__(self) -> None:
super().__init__("apibay.org", "https://apibay.org")
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
_ = page # single-page API
params = self.params or SearchParams(name="")
return f"{self.base}/q.php", {"q": params.name}
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
results: List[TorrentInfo] = []
try:
data = response.json()
except Exception:
return results
if not isinstance(data, list):
return results
for item in data:
if not isinstance(item, dict):
continue
name = str(item.get("name") or "").strip()
info_hash = str(item.get("info_hash") or "").strip()
if not name or not info_hash:
continue
magnet = self._build_magnet(info_hash, name)
seeders = self._int_from_text(item.get("seeders"))
leechers = self._int_from_text(item.get("leechers"))
size_raw = str(item.get("size") or "").strip()
size_fmt = self._format_size(size_raw)
results.append(
TorrentInfo(
name=name,
url=f"{self.base}/description.php?id={item.get('id')}",
seeders=seeders,
leechers=leechers,
size=size_fmt,
source=self.name,
category=str(item.get("category") or ""),
uploader=str(item.get("username") or ""),
magnet=magnet,
)
)
return results
@staticmethod
def _build_magnet(info_hash: str, name: str) -> str:
return f"magnet:?xt=urn:btih:{info_hash}&dn={requests.utils.quote(name)}"
@staticmethod
def _format_size(size_raw: str) -> str:
try:
size_int = int(size_raw)
if size_int <= 0:
return size_raw
gb = size_int / (1024 ** 3)
if gb >= 1:
return f"{gb:.1f} GB"
mb = size_int / (1024 ** 2)
return f"{mb:.1f} MB"
except Exception:
return size_raw
class Torrent(Provider):
TABLE_AUTO_STAGES = {"torrent": ["download-file"]}
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
super().__init__(config)
self.scrapers: List[Scraper] = []
# JSON APIs (no lxml dependency)
self.scrapers.append(ApiBayScraper())
self.scrapers.append(YTSScraper())
# HTML scrapers require lxml
if lxml_html is not None:
self.scrapers.append(NyaaScraper())
self.scrapers.append(X1337Scraper())
else:
log("[torrent] lxml not installed; skipping Nyaa/1337x scrapers", file=None)
def validate(self) -> bool:
return bool(self.scrapers)
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**_kwargs: Any,
) -> List[SearchResult]:
q = str(query or "").strip()
if not q:
return []
params = SearchParams(name=q, order_column="seeders", order_ascending=False)
results: List[TorrentInfo] = []
for scraper in self.scrapers:
try:
scraped = scraper.find(params, pages=1)
results.extend(scraped)
except Exception as exc:
debug(f"[torrent] scraper {scraper.name} failed: {exc}")
continue
results = sorted(results, key=lambda r: r.seeders, reverse=True)
if limit and limit > 0:
results = results[:limit]
out: List[SearchResult] = []
for item in results:
path = item.magnet or item.url
columns = [
("TITLE", item.name),
("Seeds", str(item.seeders)),
("Leechers", str(item.leechers)),
("Size", item.size or ""),
("Source", item.source),
]
if item.uploader:
columns.append(("Uploader", item.uploader))
md = {
"magnet": item.magnet,
"url": item.url,
"source": item.source,
"seeders": item.seeders,
"leechers": item.leechers,
"size": item.size,
}
if item.uploader:
md["uploader"] = item.uploader
out.append(
SearchResult(
table="torrent",
title=item.name,
path=path,
detail=f"Seeds:{item.seeders} | Size:{item.size}",
annotations=[item.source],
media_kind="other",
columns=columns,
full_metadata=md,
tag={"torrent"},
)
)
return out