df
This commit is contained in:
442
Provider/torrent.py
Normal file
442
Provider/torrent.py
Normal file
@@ -0,0 +1,442 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import debug, log
|
||||
try: # Preferred HTML parser
|
||||
from lxml import html as lxml_html
|
||||
except Exception: # pragma: no cover - optional
|
||||
lxml_html = None # type: ignore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TorrentInfo:
|
||||
name: str
|
||||
url: str
|
||||
seeders: int
|
||||
leechers: int
|
||||
size: str
|
||||
source: str
|
||||
category: Optional[str] = None
|
||||
uploader: Optional[str] = None
|
||||
magnet: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchParams:
|
||||
name: str
|
||||
category: Optional[str] = None
|
||||
order_column: Optional[str] = None
|
||||
order_ascending: bool = False
|
||||
|
||||
|
||||
_MAGNET_RE = re.compile(r"^magnet", re.IGNORECASE)
|
||||
|
||||
|
||||
class Scraper:
|
||||
def __init__(self, name: str, base_url: str, timeout: float = 10.0) -> None:
|
||||
self.name = name
|
||||
self.base = base_url.rstrip("/")
|
||||
self.timeout = timeout
|
||||
self.headers = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"
|
||||
)
|
||||
}
|
||||
self.params: Optional[SearchParams] = None
|
||||
|
||||
def find(self, params: SearchParams, pages: int = 1) -> List[TorrentInfo]:
|
||||
self.params = params
|
||||
results: List[TorrentInfo] = []
|
||||
for page in range(1, max(1, pages) + 1):
|
||||
try:
|
||||
results.extend(self._get_page(page))
|
||||
except Exception as exc:
|
||||
debug(f"[{self.name}] page fetch failed: {exc}")
|
||||
return results
|
||||
|
||||
def _get_page(self, page: int) -> List[TorrentInfo]:
|
||||
url, payload = self._request_data(page)
|
||||
try:
|
||||
resp = requests.get(
|
||||
url,
|
||||
params=payload,
|
||||
headers=self.headers,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return self._parse_search(resp)
|
||||
except Exception as exc:
|
||||
debug(f"[{self.name}] request failed: {exc}")
|
||||
return []
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
return self.base, {}
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]: # pragma: no cover - interface
|
||||
raise NotImplementedError
|
||||
|
||||
def _parse_detail(self, url: str) -> Optional[str]: # optional override
|
||||
try:
|
||||
resp = requests.get(url, headers=self.headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
return self._parse_detail_response(resp)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _parse_detail_response(self, response: requests.Response) -> Optional[str]: # pragma: no cover - interface
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _int_from_text(value: Any) -> int:
|
||||
try:
|
||||
return int(str(value).strip().replace(",", ""))
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
class NyaaScraper(Scraper):
|
||||
def __init__(self) -> None:
|
||||
super().__init__("nyaa.si", "https://nyaa.si")
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
params = self.params or SearchParams(name="")
|
||||
payload = {
|
||||
"p": page,
|
||||
"q": params.name,
|
||||
"c": params.category or "0_0",
|
||||
"f": "0",
|
||||
}
|
||||
if params.order_column:
|
||||
payload["s"] = params.order_column
|
||||
payload["o"] = "asc" if params.order_ascending else "desc"
|
||||
return f"{self.base}/", payload
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
if lxml_html is None:
|
||||
return []
|
||||
doc = lxml_html.fromstring(response.text)
|
||||
rows = doc.xpath("//table//tbody/tr")
|
||||
results: List[TorrentInfo] = []
|
||||
for row in rows:
|
||||
cells = row.xpath("./td")
|
||||
if len(cells) < 7:
|
||||
continue
|
||||
category_cell, name_cell, links_cell, size_cell, _, seed_cell, leech_cell, *_ = cells
|
||||
|
||||
name_links = name_cell.xpath("./a")
|
||||
name_tag = name_links[1] if len(name_links) > 1 else (name_links[0] if name_links else None)
|
||||
if name_tag is None:
|
||||
continue
|
||||
|
||||
name = name_tag.get("title") or (name_tag.text_content() or "").strip()
|
||||
url = name_tag.get("href") or ""
|
||||
|
||||
magnet_link = None
|
||||
magnet_candidates = links_cell.xpath('.//a[starts-with(@href,"magnet:")]/@href')
|
||||
if magnet_candidates:
|
||||
magnet_link = magnet_candidates[0]
|
||||
|
||||
category_title = None
|
||||
cat_titles = category_cell.xpath(".//a/@title")
|
||||
if cat_titles:
|
||||
category_title = cat_titles[0]
|
||||
|
||||
results.append(
|
||||
TorrentInfo(
|
||||
name=name,
|
||||
url=f"{self.base}{url}",
|
||||
seeders=self._int_from_text(seed_cell.text_content()),
|
||||
leechers=self._int_from_text(leech_cell.text_content()),
|
||||
size=(size_cell.text_content() or "").strip(),
|
||||
source=self.name,
|
||||
category=category_title,
|
||||
magnet=magnet_link,
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
class X1337Scraper(Scraper):
|
||||
def __init__(self) -> None:
|
||||
super().__init__("1337x.to", "https://1337x.to")
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
params = self.params or SearchParams(name="")
|
||||
order = None
|
||||
if params.order_column:
|
||||
direction = "asc" if params.order_ascending else "desc"
|
||||
order = f"{params.order_column}/{direction}"
|
||||
|
||||
category = params.category
|
||||
name = requests.utils.quote(params.name)
|
||||
|
||||
if order and category:
|
||||
path = f"/sort-category-search/{name}/{category}/{order}"
|
||||
elif category:
|
||||
path = f"/category-search/{name}/{category}"
|
||||
elif order:
|
||||
path = f"/sort-search/{name}/{order}"
|
||||
else:
|
||||
path = f"/search/{name}"
|
||||
|
||||
url = f"{self.base}{path}/{page}/"
|
||||
return url, {}
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
if lxml_html is None:
|
||||
return []
|
||||
doc = lxml_html.fromstring(response.text)
|
||||
rows = doc.xpath("//table//tbody/tr")
|
||||
results: List[TorrentInfo] = []
|
||||
for row in rows:
|
||||
cells = row.xpath("./td")
|
||||
if len(cells) < 6:
|
||||
continue
|
||||
name_cell, seeds_cell, leech_cell, _, size_cell, uploader_cell = cells
|
||||
|
||||
links = name_cell.xpath(".//a")
|
||||
if len(links) < 2:
|
||||
continue
|
||||
|
||||
torrent_path = links[1].get("href")
|
||||
torrent_url = f"{self.base}{torrent_path}" if torrent_path else ""
|
||||
|
||||
info = TorrentInfo(
|
||||
name=(links[1].text_content() or "").strip(),
|
||||
url=torrent_url,
|
||||
seeders=self._int_from_text(seeds_cell.text_content()),
|
||||
leechers=self._int_from_text(leech_cell.text_content()),
|
||||
size=(size_cell.text_content() or "").strip().replace(",", ""),
|
||||
source=self.name,
|
||||
uploader=(uploader_cell.text_content() or "").strip() if uploader_cell is not None else None,
|
||||
)
|
||||
|
||||
if not info.magnet:
|
||||
info.magnet = self._parse_detail(info.url)
|
||||
results.append(info)
|
||||
return results
|
||||
|
||||
def _parse_detail_response(self, response: requests.Response) -> Optional[str]:
|
||||
if lxml_html is None:
|
||||
return None
|
||||
doc = lxml_html.fromstring(response.text)
|
||||
links = doc.xpath("//main//a[starts-with(@href,'magnet:')]/@href")
|
||||
return links[0] if links else None
|
||||
|
||||
|
||||
class YTSScraper(Scraper):
|
||||
TRACKERS = "&tr=".join(
|
||||
[
|
||||
"udp://open.demonii.com:1337/announce",
|
||||
"udp://tracker.opentrackr.org:1337/announce",
|
||||
"udp://tracker.leechers-paradise.org:6969",
|
||||
]
|
||||
)
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("yts.mx", "https://yts.mx/api/v2")
|
||||
self.headers = {}
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
params = self.params or SearchParams(name="")
|
||||
payload = {
|
||||
"limit": 50,
|
||||
"page": page,
|
||||
"query_term": params.name,
|
||||
"sort_by": "seeds",
|
||||
"order_by": "desc" if not params.order_ascending else "asc",
|
||||
}
|
||||
return f"{self.base}/list_movies.json", payload
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
results: List[TorrentInfo] = []
|
||||
data = response.json()
|
||||
if data.get("status") != "ok":
|
||||
return results
|
||||
movies = (data.get("data") or {}).get("movies") or []
|
||||
for movie in movies:
|
||||
torrents = movie.get("torrents") or []
|
||||
if not torrents:
|
||||
continue
|
||||
tor = max(torrents, key=lambda t: t.get("seeds", 0))
|
||||
name = movie.get("title") or "unknown"
|
||||
info = TorrentInfo(
|
||||
name=name,
|
||||
url=str(movie.get("id") or ""),
|
||||
seeders=int(tor.get("seeds", 0) or 0),
|
||||
leechers=int(tor.get("peers", 0) or 0),
|
||||
size=str(tor.get("size") or ""),
|
||||
source=self.name,
|
||||
category=(movie.get("genres") or [None])[0],
|
||||
magnet=self._build_magnet(tor, name),
|
||||
)
|
||||
results.append(info)
|
||||
return results
|
||||
|
||||
def _build_magnet(self, torrent: Dict[str, Any], name: str) -> str:
|
||||
return (
|
||||
f"magnet:?xt=urn:btih:{torrent.get('hash')}"
|
||||
f"&dn={requests.utils.quote(name)}&tr={self.TRACKERS}"
|
||||
)
|
||||
|
||||
|
||||
class ApiBayScraper(Scraper):
|
||||
"""Scraper for apibay.org (The Pirate Bay API clone)."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("apibay.org", "https://apibay.org")
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
_ = page # single-page API
|
||||
params = self.params or SearchParams(name="")
|
||||
return f"{self.base}/q.php", {"q": params.name}
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
results: List[TorrentInfo] = []
|
||||
try:
|
||||
data = response.json()
|
||||
except Exception:
|
||||
return results
|
||||
if not isinstance(data, list):
|
||||
return results
|
||||
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
name = str(item.get("name") or "").strip()
|
||||
info_hash = str(item.get("info_hash") or "").strip()
|
||||
if not name or not info_hash:
|
||||
continue
|
||||
|
||||
magnet = self._build_magnet(info_hash, name)
|
||||
seeders = self._int_from_text(item.get("seeders"))
|
||||
leechers = self._int_from_text(item.get("leechers"))
|
||||
size_raw = str(item.get("size") or "").strip()
|
||||
size_fmt = self._format_size(size_raw)
|
||||
|
||||
results.append(
|
||||
TorrentInfo(
|
||||
name=name,
|
||||
url=f"{self.base}/description.php?id={item.get('id')}",
|
||||
seeders=seeders,
|
||||
leechers=leechers,
|
||||
size=size_fmt,
|
||||
source=self.name,
|
||||
category=str(item.get("category") or ""),
|
||||
uploader=str(item.get("username") or ""),
|
||||
magnet=magnet,
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def _build_magnet(info_hash: str, name: str) -> str:
|
||||
return f"magnet:?xt=urn:btih:{info_hash}&dn={requests.utils.quote(name)}"
|
||||
|
||||
@staticmethod
|
||||
def _format_size(size_raw: str) -> str:
|
||||
try:
|
||||
size_int = int(size_raw)
|
||||
if size_int <= 0:
|
||||
return size_raw
|
||||
gb = size_int / (1024 ** 3)
|
||||
if gb >= 1:
|
||||
return f"{gb:.1f} GB"
|
||||
mb = size_int / (1024 ** 2)
|
||||
return f"{mb:.1f} MB"
|
||||
except Exception:
|
||||
return size_raw
|
||||
|
||||
|
||||
class Torrent(Provider):
|
||||
TABLE_AUTO_STAGES = {"torrent": ["download-file"]}
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||||
super().__init__(config)
|
||||
self.scrapers: List[Scraper] = []
|
||||
# JSON APIs (no lxml dependency)
|
||||
self.scrapers.append(ApiBayScraper())
|
||||
self.scrapers.append(YTSScraper())
|
||||
# HTML scrapers require lxml
|
||||
if lxml_html is not None:
|
||||
self.scrapers.append(NyaaScraper())
|
||||
self.scrapers.append(X1337Scraper())
|
||||
else:
|
||||
log("[torrent] lxml not installed; skipping Nyaa/1337x scrapers", file=None)
|
||||
|
||||
def validate(self) -> bool:
|
||||
return bool(self.scrapers)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**_kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
q = str(query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
params = SearchParams(name=q, order_column="seeders", order_ascending=False)
|
||||
results: List[TorrentInfo] = []
|
||||
|
||||
for scraper in self.scrapers:
|
||||
try:
|
||||
scraped = scraper.find(params, pages=1)
|
||||
results.extend(scraped)
|
||||
except Exception as exc:
|
||||
debug(f"[torrent] scraper {scraper.name} failed: {exc}")
|
||||
continue
|
||||
|
||||
results = sorted(results, key=lambda r: r.seeders, reverse=True)
|
||||
if limit and limit > 0:
|
||||
results = results[:limit]
|
||||
|
||||
out: List[SearchResult] = []
|
||||
for item in results:
|
||||
path = item.magnet or item.url
|
||||
columns = [
|
||||
("TITLE", item.name),
|
||||
("Seeds", str(item.seeders)),
|
||||
("Leechers", str(item.leechers)),
|
||||
("Size", item.size or ""),
|
||||
("Source", item.source),
|
||||
]
|
||||
if item.uploader:
|
||||
columns.append(("Uploader", item.uploader))
|
||||
|
||||
md = {
|
||||
"magnet": item.magnet,
|
||||
"url": item.url,
|
||||
"source": item.source,
|
||||
"seeders": item.seeders,
|
||||
"leechers": item.leechers,
|
||||
"size": item.size,
|
||||
}
|
||||
if item.uploader:
|
||||
md["uploader"] = item.uploader
|
||||
|
||||
out.append(
|
||||
SearchResult(
|
||||
table="torrent",
|
||||
title=item.name,
|
||||
path=path,
|
||||
detail=f"Seeds:{item.seeders} | Size:{item.size}",
|
||||
annotations=[item.source],
|
||||
media_kind="other",
|
||||
columns=columns,
|
||||
full_metadata=md,
|
||||
tag={"torrent"},
|
||||
)
|
||||
)
|
||||
return out
|
||||
Reference in New Issue
Block a user