This commit is contained in:
2026-01-05 07:51:19 -08:00
parent 8545367e28
commit 1f765cffda
32 changed files with 3447 additions and 3250 deletions

View File

@@ -1,12 +1,15 @@
from __future__ import annotations
import os
import random
import re
import shutil
import string
import subprocess
import time
import sys
from pathlib import Path
import subprocess
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, Iterable, List, Optional, Tuple
from API.hifi import HifiApiClient
from ProviderCore.base import Provider, SearchResult
from SYS.logger import debug, log
@@ -733,6 +736,10 @@ class HIFI(Provider):
input_ref: str,
output_path: Path,
lossless_fallback: bool = True,
progress: Optional[Any] = None,
transfer_label: Optional[str] = None,
duration_seconds: Optional[int] = None,
audio_quality: Optional[str] = None,
) -> Optional[Path]:
ffmpeg_path = self._find_ffmpeg()
if not ffmpeg_path:
@@ -749,20 +756,115 @@ class HIFI(Provider):
protocol_whitelist = "file,https,http,tcp,tls,crypto,data"
def _run(cmd: List[str]) -> bool:
label = str(transfer_label or output_path.name or "hifi")
def _estimate_total_bytes() -> Optional[int]:
try:
proc = subprocess.run(
cmd,
capture_output=True,
text=True,
check=False,
dur = int(duration_seconds) if duration_seconds is not None else None
except Exception:
dur = None
if not dur or dur <= 0:
return None
qual = str(audio_quality or "").strip().lower()
# Rough per-quality bitrate guess (bytes/sec).
if qual in {"hi_res",
"hi_res_lossless",
"hires",
"hi-res",
"master",
"mqa"}:
bps = 4_608_000 # ~24-bit/96k stereo
elif qual in {"lossless",
"flac"}:
bps = 1_411_200 # 16-bit/44.1k stereo
else:
bps = 320_000 # kbps for compressed
try:
return int((bps / 8.0) * dur)
except Exception:
return None
est_total_bytes = _estimate_total_bytes()
def _update_transfer(total_bytes_val: Optional[int]) -> None:
if progress is None:
return
try:
progress.update_transfer(
label=label,
completed=int(total_bytes_val) if total_bytes_val is not None else None,
total=est_total_bytes,
)
except Exception:
pass
def _run(cmd: List[str], *, target_path: Optional[Path] = None) -> bool:
cmd_progress = list(cmd)
# Enable ffmpeg progress output for live byte updates.
cmd_progress.insert(1, "-progress")
cmd_progress.insert(2, "pipe:1")
cmd_progress.insert(3, "-nostats")
try:
proc = subprocess.Popen(
cmd_progress,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
if proc.returncode == 0 and self._has_nonempty_file(output_path):
return True
if proc.stderr:
debug(f"[hifi] ffmpeg failed: {proc.stderr.strip()}")
except Exception as exc:
debug(f"[hifi] ffmpeg invocation failed: {exc}")
return False
last_bytes = None
try:
while True:
line = proc.stdout.readline() if proc.stdout else ""
if not line:
if proc.poll() is not None:
break
time.sleep(0.05)
continue
if "=" not in line:
continue
key, val = line.strip().split("=", 1)
if key == "total_size":
try:
last_bytes = int(val)
_update_transfer(last_bytes)
except Exception:
pass
elif key == "out_time_ms":
# Map out_time_ms to byte estimate when total_size missing.
try:
if est_total_bytes and val.isdigit():
ms = int(val)
dur_ms = (duration_seconds or 0) * 1000
if dur_ms > 0:
pct = min(1.0, max(0.0, ms / dur_ms))
approx = int(est_total_bytes * pct)
_update_transfer(approx)
except Exception:
pass
proc.wait()
finally:
if last_bytes is not None:
_update_transfer(last_bytes)
check_path = target_path or output_path
if proc.returncode == 0 and self._has_nonempty_file(check_path):
return True
try:
stderr_text = proc.stderr.read() if proc.stderr else ""
if stderr_text:
debug(f"[hifi] ffmpeg failed: {stderr_text.strip()}")
except Exception:
pass
return False
# Prefer remux (fast, no transcode).
@@ -816,25 +918,14 @@ class HIFI(Provider):
"flac",
str(tmp_flac_path),
]
try:
proc = subprocess.run(
cmd_flac,
capture_output=True,
text=True,
check=False,
)
if proc.returncode == 0 and self._has_nonempty_file(tmp_flac_path):
if tmp_flac_path != flac_path:
try:
tmp_flac_path.replace(flac_path)
except Exception:
# If rename fails, still return the temp file.
return tmp_flac_path
return flac_path
if proc.stderr:
debug(f"[hifi] ffmpeg flac fallback failed: {proc.stderr.strip()}")
except Exception as exc:
debug(f"[hifi] ffmpeg flac fallback invocation failed: {exc}")
if _run(cmd_flac, target_path=tmp_flac_path) and self._has_nonempty_file(tmp_flac_path):
if tmp_flac_path != flac_path:
try:
tmp_flac_path.replace(flac_path)
except Exception:
# If rename fails, still return the temp file.
return tmp_flac_path
return flac_path
return None
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
@@ -921,7 +1012,14 @@ class HIFI(Provider):
# If resolve_tidal_manifest_path returned a URL, prefer feeding it directly to ffmpeg.
if resolved_text.lower().startswith("http"):
out_file = output_dir / f"{stem}{suffix}"
materialized = self._ffmpeg_demux_to_audio(input_ref=resolved_text, output_path=out_file)
materialized = self._ffmpeg_demux_to_audio(
input_ref=resolved_text,
output_path=out_file,
progress=self.config.get("_pipeline_progress") if isinstance(self.config, dict) else None,
transfer_label=title_part or getattr(result, "title", None),
duration_seconds=self._coerce_duration_seconds(md),
audio_quality=md.get("audioQuality") if isinstance(md, dict) else None,
)
if materialized is not None:
return materialized
@@ -947,7 +1045,14 @@ class HIFI(Provider):
if source_path.is_file() and source_path.suffix.lower() == ".mpd":
# Materialize audio from the local MPD.
out_file = output_dir / f"{stem}{suffix}"
materialized = self._ffmpeg_demux_to_audio(input_ref=str(source_path), output_path=out_file)
materialized = self._ffmpeg_demux_to_audio(
input_ref=str(source_path),
output_path=out_file,
progress=self.config.get("_pipeline_progress") if isinstance(self.config, dict) else None,
transfer_label=title_part or getattr(result, "title", None),
duration_seconds=self._coerce_duration_seconds(md),
audio_quality=md.get("audioQuality") if isinstance(md, dict) else None,
)
if materialized is not None:
return materialized
return None
@@ -965,7 +1070,14 @@ class HIFI(Provider):
# As a last resort, attempt to treat the local path as an ffmpeg input.
out_file = output_dir / f"{stem}{suffix}"
materialized = self._ffmpeg_demux_to_audio(input_ref=resolved_text, output_path=out_file)
materialized = self._ffmpeg_demux_to_audio(
input_ref=resolved_text,
output_path=out_file,
progress=self.config.get("_pipeline_progress") if isinstance(self.config, dict) else None,
transfer_label=title_part or getattr(result, "title", None),
duration_seconds=self._coerce_duration_seconds(md),
audio_quality=md.get("audioQuality") if isinstance(md, dict) else None,
)
return materialized
def _get_api_client_for_base(self, base_url: str) -> Optional[HifiApiClient]:
@@ -1228,6 +1340,38 @@ class HIFI(Provider):
minutes, secs = divmod(total, 60)
return f"{minutes}:{secs:02d}"
@staticmethod
def _coerce_duration_seconds(value: Any) -> Optional[int]:
candidates = []
candidates.append(value)
try:
if isinstance(value, dict):
for key in ("duration",
"durationSeconds",
"duration_sec",
"duration_ms",
"durationMillis"):
if key in value:
candidates.append(value.get(key))
except Exception:
pass
for cand in candidates:
try:
if cand is None:
continue
if isinstance(cand, str) and cand.strip().endswith("ms"):
cand = cand.strip()[:-2]
v = float(cand)
if v <= 0:
continue
if v > 10_000: # treat as milliseconds
v = v / 1000.0
return int(round(v))
except Exception:
continue
return None
@staticmethod
def _stringify(value: Any) -> str:
text = str(value or "").strip()
@@ -1305,23 +1449,18 @@ class HIFI(Provider):
if audio_quality:
columns.append(("Quality", audio_quality))
tags = {"tidal"}
if audio_quality:
tags.add(f"quality:{audio_quality.lower()}")
metadata = item.get("mediaMetadata")
if isinstance(metadata, dict):
tag_values = metadata.get("tags") or []
for tag in tag_values:
if isinstance(tag, str) and tag.strip():
tags.add(tag.strip().lower())
# IMPORTANT: do not retain a shared reference to the raw API dict.
# Downstream playback (MPV) mutates metadata to cache the decoded Tidal
# manifest path/URL. If multiple results share the same dict reference,
# they can incorrectly collapse to a single playable target.
full_md: Dict[str, Any] = dict(item)
url_value = self._stringify(full_md.get("url"))
if url_value:
full_md["url"] = url_value
return SearchResult(
tags = self._build_track_tags(full_md)
result = SearchResult(
table="hifi",
title=title,
path=path,
@@ -1332,6 +1471,12 @@ class HIFI(Provider):
columns=columns,
full_metadata=full_md,
)
if url_value:
try:
result.url = url_value
except Exception:
pass
return result
def _extract_track_selection_context(
self, selected_items: List[Any]
@@ -1401,6 +1546,9 @@ class HIFI(Provider):
def _fetch_track_details(self, track_id: int) -> Optional[Dict[str, Any]]:
if track_id <= 0:
return None
info_data = self._fetch_track_info(track_id)
for base in self.api_urls:
endpoint = f"{base.rstrip('/')}/track/"
try:
@@ -1408,12 +1556,32 @@ class HIFI(Provider):
payload = client.track(track_id) if client else None
data = payload.get("data") if isinstance(payload, dict) else None
if isinstance(data, dict):
return data
merged: Dict[str, Any] = {}
if isinstance(info_data, dict):
merged.update(info_data)
merged.update(data)
return merged
except Exception as exc:
log(f"[hifi] Track lookup failed for {endpoint}: {exc}", file=sys.stderr)
continue
return None
def _fetch_track_info(self, track_id: int) -> Optional[Dict[str, Any]]:
if track_id <= 0:
return None
for base in self.api_urls:
endpoint = f"{base.rstrip('/')}/info/"
try:
client = self._get_api_client_for_base(base)
payload = client.info(track_id) if client else None
data = payload.get("data") if isinstance(payload, dict) else None
if isinstance(data, dict):
return data
except Exception as exc:
debug(f"[hifi] Info lookup failed for {endpoint}: {exc}")
continue
return None
def _fetch_track_lyrics(self, track_id: int) -> Optional[Dict[str, Any]]:
if track_id <= 0:
return None
@@ -1450,6 +1618,54 @@ class HIFI(Provider):
]
return [(name, value) for name, value in values if value]
def _build_track_tags(self, metadata: Dict[str, Any]) -> set[str]:
tags: set[str] = {"tidal"}
audio_quality = self._stringify(metadata.get("audioQuality"))
if audio_quality:
tags.add(f"quality:{audio_quality.lower()}")
media_md = metadata.get("mediaMetadata")
if isinstance(media_md, dict):
tag_values = media_md.get("tags") or []
for tag in tag_values:
if isinstance(tag, str):
candidate = tag.strip()
if candidate:
tags.add(candidate.lower())
title_text = self._stringify(metadata.get("title"))
if title_text:
tags.add(f"title:{title_text}")
artists = self._extract_artists(metadata)
for artist in artists:
artist_clean = self._stringify(artist)
if artist_clean:
tags.add(f"artist:{artist_clean}")
album_title = ""
album_obj = metadata.get("album")
if isinstance(album_obj, dict):
album_title = self._stringify(album_obj.get("title"))
else:
album_title = self._stringify(metadata.get("album"))
if album_title:
tags.add(f"album:{album_title}")
track_no_val = metadata.get("trackNumber") or metadata.get("track_number")
if track_no_val is not None:
try:
track_int = int(track_no_val)
if track_int > 0:
tags.add(f"track:{track_int}")
except Exception:
track_text = self._stringify(track_no_val)
if track_text:
tags.add(f"track:{track_text}")
return tags
def selector(
self,
selected_items: List[Any],
@@ -1476,16 +1692,32 @@ class HIFI(Provider):
else None
)
try:
debug(
f"[hifi.selector] table_type={table_type} stage_is_last={stage_is_last} selected_count={len(selected_items) if selected_items else 0}"
)
except Exception:
pass
# Artist selection: selecting @N should open an albums list.
if isinstance(table_type, str) and table_type.strip().lower() == "hifi.artist":
contexts = self._extract_artist_selection_context(selected_items)
try:
debug(f"[hifi.selector] artist contexts={len(contexts)}")
except Exception:
pass
if not contexts:
return False
artist_id, artist_name = contexts[0]
album_results = self._albums_for_artist(artist_id=artist_id, artist_name=artist_name, limit=200)
if not album_results:
return False
try:
from SYS.rich_display import stdout_console
stdout_console().print(f"[bold yellow][hifi] No albums found for {artist_name}[/]")
except Exception:
log(f"[hifi] No albums found for {artist_name}")
return True
try:
from SYS.rich_display import stdout_console
@@ -1531,6 +1763,10 @@ class HIFI(Provider):
# Album selection: selecting @N should open the track list for that album.
if isinstance(table_type, str) and table_type.strip().lower() == "hifi.album":
contexts = self._extract_album_selection_context(selected_items)
try:
debug(f"[hifi.selector] album contexts={len(contexts)}")
except Exception:
pass
if not contexts:
return False
@@ -1605,6 +1841,10 @@ class HIFI(Provider):
return False
contexts = self._extract_track_selection_context(selected_items)
try:
debug(f"[hifi.selector] track contexts={len(contexts)}")
except Exception:
pass
if not contexts:
return False
@@ -1657,6 +1897,9 @@ class HIFI(Provider):
insert_pos = 2 if artist_display else 1
columns.insert(insert_pos, ("Album", album_title))
tags = self._build_track_tags(detail)
url_value = self._stringify(detail.get("url"))
result = SearchResult(
table="hifi",
title=title,
@@ -1666,7 +1909,13 @@ class HIFI(Provider):
media_kind="audio",
columns=columns,
full_metadata=detail,
tag=tags,
)
if url_value:
try:
result.url = url_value
except Exception:
pass
table.add_result(result)
try:
results_payload.append(result.to_dict())

View File

@@ -8,12 +8,11 @@ from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Callable, Tuple
from urllib.parse import urlparse
from API.HTTP import HTTPClient
from API.HTTP import HTTPClient, _download_direct_file
from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_torrent_file
from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import sanitize_filename
from SYS.download import _download_direct_file
from SYS.logger import log
from SYS.logger import log, debug
from SYS.models import DownloadError
_HOSTS_CACHE_TTL_SECONDS = 24 * 60 * 60
@@ -302,7 +301,7 @@ def _dispatch_alldebrid_magnet_search(
)
except Exception:
pass
log(f"[alldebrid] Sent magnet {magnet_id} to AllDebrid for download", file=sys.stderr)
debug(f"[alldebrid] Sent magnet {magnet_id} to AllDebrid for download")
def prepare_magnet(

192
Provider/hello_provider.py Normal file
View File

@@ -0,0 +1,192 @@
"""Example provider template for use as a starter kit.
This minimal provider demonstrates the typical hooks a provider may implement:
- `validate()` to assert it's usable
- `search()` to return `SearchResult` items
- `download()` to persist a sample file (useful for local tests)
See `docs/provider_guide.md` for authoring guidance.
"""
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List, Optional
from ProviderCore.base import Provider, SearchResult
class HelloProvider(Provider):
"""Very small example provider suitable as a template.
- Table name: `hello`
- Usage: `search-file -provider hello "query"`
- Selecting a row and piping into `download-file` will call `download()`.
"""
URL = ("hello:",)
URL_DOMAINS = ()
def validate(self) -> bool:
# No configuration required; always available for testing/demo purposes.
return True
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
q = (query or "").strip()
results: List[SearchResult] = []
if not q or q in {"*", "all", "list"}:
q = "example"
# Emit up to `limit` tiny example results.
n = min(max(1, int(limit)), 3)
for i in range(1, n + 1):
title = f"{q} sample {i}"
path = f"https://example.org/{q}/{i}"
sr = SearchResult(
table="hello",
title=title,
path=path,
detail="Example provider result",
media_kind="file",
columns=[("Example", "yes")],
full_metadata={"example_index": i},
)
results.append(sr)
return results[: max(0, int(limit))]
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
"""Create a small text file to simulate a download.
This keeps the example self-contained (no network access required) and
makes it straightforward to test provider behavior with `pytest`.
"""
try:
Path(output_dir).mkdir(parents=True, exist_ok=True)
except Exception:
pass
title = str(getattr(result, "title", "hello") or "hello").strip()
safe = "".join(c if c.isalnum() or c in ("-", "_", ".") else "_" for c in title)
fname = f"{safe}.txt" if safe else "hello.txt"
dest = Path(output_dir) / fname
try:
dest.write_text(f"Hello from HelloProvider\nsource: {result.path}\n", encoding="utf-8")
return dest
except Exception:
return None
def selector(
self,
selected_items: List[Any],
*,
ctx: Any,
stage_is_last: bool = True,
**_kwargs: Any,
) -> bool:
"""Present a simple details table when a HelloProvider row is selected.
This demonstrates how providers can implement custom `@N` selection
behavior by constructing a `ResultTable`, populating it with
provider-specific rows, and instructing the CLI to show the table.
"""
if not stage_is_last:
return False
def _as_payload(item: Any) -> Dict[str, Any]:
if isinstance(item, dict):
return dict(item)
try:
if hasattr(item, "to_dict"):
maybe = item.to_dict()
if isinstance(maybe, dict):
return maybe
except Exception:
pass
payload: Dict[str, Any] = {}
try:
payload = {
"title": getattr(item, "title", None),
"path": getattr(item, "path", None),
"table": getattr(item, "table", None),
"annotations": getattr(item, "annotations", None),
"media_kind": getattr(item, "media_kind", None),
"full_metadata": getattr(item, "full_metadata", None),
}
except Exception:
payload = {}
return payload
chosen: List[Dict[str, Any]] = []
for item in selected_items or []:
payload = _as_payload(item)
meta = payload.get("full_metadata") or {}
if not isinstance(meta, dict):
meta = {}
idx = meta.get("example_index")
if idx is None:
continue
title = str(payload.get("title") or payload.get("path") or "").strip() or f"hello-{idx}"
chosen.append({"index": idx, "title": title, "path": payload.get("path")})
if not chosen:
return False
target = chosen[0]
idx = target.get("index")
title = target.get("title") or f"hello-{idx}"
try:
from SYS.result_table import ResultTable
from SYS.rich_display import stdout_console
except Exception:
# If ResultTable isn't available, consider selection handled
return True
table = ResultTable(f"Hello Details: {title}").set_preserve_order(True)
table.set_table("hello")
try:
table.set_table_metadata({"provider": "hello", "view": "details", "example_index": idx})
except Exception:
pass
table.set_source_command("download-file", [])
results_payload: List[Dict[str, Any]] = []
for part in ("a", "b"):
file_title = f"{title} - part {part}"
file_path = f"{target.get('path')}/{part}"
sr = SearchResult(
table="hello",
title=file_title,
path=file_path,
detail=f"Part {part}",
media_kind="file",
columns=[("Part", part)],
full_metadata={"part": part, "example_index": idx},
)
table.add_result(sr)
try:
results_payload.append(sr.to_dict())
except Exception:
results_payload.append({"table": sr.table, "title": sr.title, "path": sr.path})
try:
ctx.set_last_result_table(table, results_payload)
ctx.set_current_stage_table(table)
except Exception:
pass
try:
stdout_console().print()
stdout_console().print(table)
except Exception:
pass
return True

View File

@@ -1224,6 +1224,9 @@ class LibgenSearch:
if results:
_call(log_info, f"[libgen] Using mirror: {mirror}")
return results
else:
_call(log_info, f"[libgen] Mirror returned 0 results; stopping mirror fallback")
break
except requests.exceptions.Timeout:
_call(log_info, f"[libgen] Mirror timed out: {mirror}")
continue

View File

@@ -304,7 +304,7 @@ class PodcastIndex(Provider):
pass
try:
from SYS.download import _download_direct_file
from API.HTTP import _download_direct_file
except Exception:
return True

442
Provider/torrent.py Normal file
View File

@@ -0,0 +1,442 @@
from __future__ import annotations
import logging
import re
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
import requests
from ProviderCore.base import Provider, SearchResult
from SYS.logger import debug, log
try: # Preferred HTML parser
from lxml import html as lxml_html
except Exception: # pragma: no cover - optional
lxml_html = None # type: ignore
logger = logging.getLogger(__name__)
@dataclass
class TorrentInfo:
name: str
url: str
seeders: int
leechers: int
size: str
source: str
category: Optional[str] = None
uploader: Optional[str] = None
magnet: Optional[str] = None
@dataclass
class SearchParams:
name: str
category: Optional[str] = None
order_column: Optional[str] = None
order_ascending: bool = False
_MAGNET_RE = re.compile(r"^magnet", re.IGNORECASE)
class Scraper:
def __init__(self, name: str, base_url: str, timeout: float = 10.0) -> None:
self.name = name
self.base = base_url.rstrip("/")
self.timeout = timeout
self.headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"
)
}
self.params: Optional[SearchParams] = None
def find(self, params: SearchParams, pages: int = 1) -> List[TorrentInfo]:
self.params = params
results: List[TorrentInfo] = []
for page in range(1, max(1, pages) + 1):
try:
results.extend(self._get_page(page))
except Exception as exc:
debug(f"[{self.name}] page fetch failed: {exc}")
return results
def _get_page(self, page: int) -> List[TorrentInfo]:
url, payload = self._request_data(page)
try:
resp = requests.get(
url,
params=payload,
headers=self.headers,
timeout=self.timeout,
)
resp.raise_for_status()
return self._parse_search(resp)
except Exception as exc:
debug(f"[{self.name}] request failed: {exc}")
return []
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
return self.base, {}
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]: # pragma: no cover - interface
raise NotImplementedError
def _parse_detail(self, url: str) -> Optional[str]: # optional override
try:
resp = requests.get(url, headers=self.headers, timeout=self.timeout)
resp.raise_for_status()
return self._parse_detail_response(resp)
except Exception:
return None
def _parse_detail_response(self, response: requests.Response) -> Optional[str]: # pragma: no cover - interface
return None
@staticmethod
def _int_from_text(value: Any) -> int:
try:
return int(str(value).strip().replace(",", ""))
except Exception:
return 0
class NyaaScraper(Scraper):
def __init__(self) -> None:
super().__init__("nyaa.si", "https://nyaa.si")
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
params = self.params or SearchParams(name="")
payload = {
"p": page,
"q": params.name,
"c": params.category or "0_0",
"f": "0",
}
if params.order_column:
payload["s"] = params.order_column
payload["o"] = "asc" if params.order_ascending else "desc"
return f"{self.base}/", payload
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
if lxml_html is None:
return []
doc = lxml_html.fromstring(response.text)
rows = doc.xpath("//table//tbody/tr")
results: List[TorrentInfo] = []
for row in rows:
cells = row.xpath("./td")
if len(cells) < 7:
continue
category_cell, name_cell, links_cell, size_cell, _, seed_cell, leech_cell, *_ = cells
name_links = name_cell.xpath("./a")
name_tag = name_links[1] if len(name_links) > 1 else (name_links[0] if name_links else None)
if name_tag is None:
continue
name = name_tag.get("title") or (name_tag.text_content() or "").strip()
url = name_tag.get("href") or ""
magnet_link = None
magnet_candidates = links_cell.xpath('.//a[starts-with(@href,"magnet:")]/@href')
if magnet_candidates:
magnet_link = magnet_candidates[0]
category_title = None
cat_titles = category_cell.xpath(".//a/@title")
if cat_titles:
category_title = cat_titles[0]
results.append(
TorrentInfo(
name=name,
url=f"{self.base}{url}",
seeders=self._int_from_text(seed_cell.text_content()),
leechers=self._int_from_text(leech_cell.text_content()),
size=(size_cell.text_content() or "").strip(),
source=self.name,
category=category_title,
magnet=magnet_link,
)
)
return results
class X1337Scraper(Scraper):
def __init__(self) -> None:
super().__init__("1337x.to", "https://1337x.to")
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
params = self.params or SearchParams(name="")
order = None
if params.order_column:
direction = "asc" if params.order_ascending else "desc"
order = f"{params.order_column}/{direction}"
category = params.category
name = requests.utils.quote(params.name)
if order and category:
path = f"/sort-category-search/{name}/{category}/{order}"
elif category:
path = f"/category-search/{name}/{category}"
elif order:
path = f"/sort-search/{name}/{order}"
else:
path = f"/search/{name}"
url = f"{self.base}{path}/{page}/"
return url, {}
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
if lxml_html is None:
return []
doc = lxml_html.fromstring(response.text)
rows = doc.xpath("//table//tbody/tr")
results: List[TorrentInfo] = []
for row in rows:
cells = row.xpath("./td")
if len(cells) < 6:
continue
name_cell, seeds_cell, leech_cell, _, size_cell, uploader_cell = cells
links = name_cell.xpath(".//a")
if len(links) < 2:
continue
torrent_path = links[1].get("href")
torrent_url = f"{self.base}{torrent_path}" if torrent_path else ""
info = TorrentInfo(
name=(links[1].text_content() or "").strip(),
url=torrent_url,
seeders=self._int_from_text(seeds_cell.text_content()),
leechers=self._int_from_text(leech_cell.text_content()),
size=(size_cell.text_content() or "").strip().replace(",", ""),
source=self.name,
uploader=(uploader_cell.text_content() or "").strip() if uploader_cell is not None else None,
)
if not info.magnet:
info.magnet = self._parse_detail(info.url)
results.append(info)
return results
def _parse_detail_response(self, response: requests.Response) -> Optional[str]:
if lxml_html is None:
return None
doc = lxml_html.fromstring(response.text)
links = doc.xpath("//main//a[starts-with(@href,'magnet:')]/@href")
return links[0] if links else None
class YTSScraper(Scraper):
TRACKERS = "&tr=".join(
[
"udp://open.demonii.com:1337/announce",
"udp://tracker.opentrackr.org:1337/announce",
"udp://tracker.leechers-paradise.org:6969",
]
)
def __init__(self) -> None:
super().__init__("yts.mx", "https://yts.mx/api/v2")
self.headers = {}
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
params = self.params or SearchParams(name="")
payload = {
"limit": 50,
"page": page,
"query_term": params.name,
"sort_by": "seeds",
"order_by": "desc" if not params.order_ascending else "asc",
}
return f"{self.base}/list_movies.json", payload
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
results: List[TorrentInfo] = []
data = response.json()
if data.get("status") != "ok":
return results
movies = (data.get("data") or {}).get("movies") or []
for movie in movies:
torrents = movie.get("torrents") or []
if not torrents:
continue
tor = max(torrents, key=lambda t: t.get("seeds", 0))
name = movie.get("title") or "unknown"
info = TorrentInfo(
name=name,
url=str(movie.get("id") or ""),
seeders=int(tor.get("seeds", 0) or 0),
leechers=int(tor.get("peers", 0) or 0),
size=str(tor.get("size") or ""),
source=self.name,
category=(movie.get("genres") or [None])[0],
magnet=self._build_magnet(tor, name),
)
results.append(info)
return results
def _build_magnet(self, torrent: Dict[str, Any], name: str) -> str:
return (
f"magnet:?xt=urn:btih:{torrent.get('hash')}"
f"&dn={requests.utils.quote(name)}&tr={self.TRACKERS}"
)
class ApiBayScraper(Scraper):
"""Scraper for apibay.org (The Pirate Bay API clone)."""
def __init__(self) -> None:
super().__init__("apibay.org", "https://apibay.org")
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
_ = page # single-page API
params = self.params or SearchParams(name="")
return f"{self.base}/q.php", {"q": params.name}
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
results: List[TorrentInfo] = []
try:
data = response.json()
except Exception:
return results
if not isinstance(data, list):
return results
for item in data:
if not isinstance(item, dict):
continue
name = str(item.get("name") or "").strip()
info_hash = str(item.get("info_hash") or "").strip()
if not name or not info_hash:
continue
magnet = self._build_magnet(info_hash, name)
seeders = self._int_from_text(item.get("seeders"))
leechers = self._int_from_text(item.get("leechers"))
size_raw = str(item.get("size") or "").strip()
size_fmt = self._format_size(size_raw)
results.append(
TorrentInfo(
name=name,
url=f"{self.base}/description.php?id={item.get('id')}",
seeders=seeders,
leechers=leechers,
size=size_fmt,
source=self.name,
category=str(item.get("category") or ""),
uploader=str(item.get("username") or ""),
magnet=magnet,
)
)
return results
@staticmethod
def _build_magnet(info_hash: str, name: str) -> str:
return f"magnet:?xt=urn:btih:{info_hash}&dn={requests.utils.quote(name)}"
@staticmethod
def _format_size(size_raw: str) -> str:
try:
size_int = int(size_raw)
if size_int <= 0:
return size_raw
gb = size_int / (1024 ** 3)
if gb >= 1:
return f"{gb:.1f} GB"
mb = size_int / (1024 ** 2)
return f"{mb:.1f} MB"
except Exception:
return size_raw
class Torrent(Provider):
TABLE_AUTO_STAGES = {"torrent": ["download-file"]}
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
super().__init__(config)
self.scrapers: List[Scraper] = []
# JSON APIs (no lxml dependency)
self.scrapers.append(ApiBayScraper())
self.scrapers.append(YTSScraper())
# HTML scrapers require lxml
if lxml_html is not None:
self.scrapers.append(NyaaScraper())
self.scrapers.append(X1337Scraper())
else:
log("[torrent] lxml not installed; skipping Nyaa/1337x scrapers", file=None)
def validate(self) -> bool:
return bool(self.scrapers)
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**_kwargs: Any,
) -> List[SearchResult]:
q = str(query or "").strip()
if not q:
return []
params = SearchParams(name=q, order_column="seeders", order_ascending=False)
results: List[TorrentInfo] = []
for scraper in self.scrapers:
try:
scraped = scraper.find(params, pages=1)
results.extend(scraped)
except Exception as exc:
debug(f"[torrent] scraper {scraper.name} failed: {exc}")
continue
results = sorted(results, key=lambda r: r.seeders, reverse=True)
if limit and limit > 0:
results = results[:limit]
out: List[SearchResult] = []
for item in results:
path = item.magnet or item.url
columns = [
("TITLE", item.name),
("Seeds", str(item.seeders)),
("Leechers", str(item.leechers)),
("Size", item.size or ""),
("Source", item.source),
]
if item.uploader:
columns.append(("Uploader", item.uploader))
md = {
"magnet": item.magnet,
"url": item.url,
"source": item.source,
"seeders": item.seeders,
"leechers": item.leechers,
"size": item.size,
}
if item.uploader:
md["uploader"] = item.uploader
out.append(
SearchResult(
table="torrent",
title=item.name,
path=path,
detail=f"Seeds:{item.seeders} | Size:{item.size}",
annotations=[item.source],
media_kind="other",
columns=columns,
full_metadata=md,
tag={"torrent"},
)
)
return out

185
Provider/vimm.py Normal file
View File

@@ -0,0 +1,185 @@
"""Vimm provider skeleton (lxml + HTTPClient).
This is a lightweight, resilient provider implementation intended as a
starting point for implementing a full Vimm (vimm.net) provider.
It prefers server-rendered HTML parsing via lxml and uses the repo's
`HTTPClient` helper for robust HTTP calls (timeouts/retries).
Selectors in `search()` are intentionally permissive heuristics; update the
XPaths to match the real site HTML when you have an actual fixture.
"""
from __future__ import annotations
import re
import sys
from typing import Any, Dict, List, Optional
from urllib.parse import urljoin, quote_plus
from lxml import html as lxml_html
from API.HTTP import HTTPClient
from ProviderCore.base import Provider, SearchResult
from SYS.logger import log, debug
class Vimm(Provider):
"""Provider for vimm.net vault listings (skeleton).
- Uses lxml for parsing
- No authentication required
"""
URL = ("https://vimm.net/vault/",)
URL_DOMAINS = ("vimm.net",)
def validate(self) -> bool:
# This provider has no required config; consider more checks if needed.
return True
def _parse_size_bytes(self, size_str: str) -> Optional[int]:
if not size_str:
return None
try:
s = str(size_str or "").strip().replace(",", "")
m = re.search(r"(?P<val>[\d\.]+)\s*(?P<unit>[KMGT]?B)?", s, flags=re.I)
if not m:
return None
val = float(m.group("val"))
unit = (m.group("unit") or "B").upper()
mul = {
"B": 1,
"KB": 1024,
"MB": 1024 ** 2,
"GB": 1024 ** 3,
"TB": 1024 ** 4,
}.get(unit, 1)
return int(val * mul)
except Exception:
return None
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
q = (query or "").strip()
if not q:
return []
# Build search/list URL
base = "https://vimm.net/vault/"
url = f"{base}?p=list&q={quote_plus(q)}"
try:
with HTTPClient(timeout=20.0) as client:
resp = client.get(url)
content = resp.content
except Exception as exc:
log(f"[vimm] HTTP fetch failed: {exc}", file=sys.stderr)
return []
try:
doc = lxml_html.fromstring(content)
except Exception as exc:
log(f"[vimm] HTML parse failed: {exc}", file=sys.stderr)
return []
results: List[SearchResult] = []
# Candidate XPaths for list items (tweak to match real DOM)
container_xpaths = [
'//div[contains(@class,"list-item")]',
'//div[contains(@class,"result")]',
'//li[contains(@class,"item")]',
'//tr[contains(@class,"result")]',
'//article',
]
nodes = []
for xp in container_xpaths:
try:
found = doc.xpath(xp)
if found:
nodes = found
debug(f"[vimm] using xpath {xp} -> {len(found)} nodes")
break
except Exception:
continue
# Fallback: try generic anchors under a list area
if not nodes:
try:
nodes = doc.xpath('//div[contains(@id,"list")]/div') or doc.xpath('//div[contains(@class,"results")]/div')
except Exception:
nodes = []
for n in (nodes or [])[: max(1, int(limit))]:
try:
# Prefer explicit title anchors
title = None
href = None
try:
# a few heuristic searches for a meaningful anchor
a = (n.xpath('.//a[contains(@class,"title")]') or
n.xpath('.//h2/a') or
n.xpath('.//a[contains(@href,"/vault/")]') or
n.xpath('.//a'))
if a:
a0 = a[0]
title = a0.text_content().strip()
href = a0.get('href')
except Exception:
title = None
href = None
if not title:
title = (n.text_content() or "").strip()
path = urljoin(base, href) if href else ""
# Extract size & platform heuristics
size_text = ""
try:
s = n.xpath('.//*[contains(@class,"size")]/text()') or n.xpath('.//span[contains(text(),"MB") or contains(text(),"GB")]/text()')
if s:
size_text = str(s[0]).strip()
except Exception:
size_text = ""
size_bytes = self._parse_size_bytes(size_text)
platform = ""
try:
p = n.xpath('.//*[contains(@class,"platform")]/text()')
if p:
platform = str(p[0]).strip()
except Exception:
platform = ""
columns = []
if platform:
columns.append(("Platform", platform))
if size_text:
columns.append(("Size", size_text))
results.append(
SearchResult(
table="vimm",
title=str(title or "").strip(),
path=str(path or ""),
detail="",
annotations=[],
media_kind="file",
size_bytes=size_bytes,
tag={"vimm"},
columns=columns,
full_metadata={"raw": lxml_html.tostring(n, encoding="unicode")},
)
)
except Exception:
continue
return results[: max(0, int(limit))]