df
This commit is contained in:
345
Provider/HIFI.py
345
Provider/HIFI.py
@@ -1,12 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import string
|
||||
import subprocess
|
||||
import time
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
from API.hifi import HifiApiClient
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import debug, log
|
||||
@@ -733,6 +736,10 @@ class HIFI(Provider):
|
||||
input_ref: str,
|
||||
output_path: Path,
|
||||
lossless_fallback: bool = True,
|
||||
progress: Optional[Any] = None,
|
||||
transfer_label: Optional[str] = None,
|
||||
duration_seconds: Optional[int] = None,
|
||||
audio_quality: Optional[str] = None,
|
||||
) -> Optional[Path]:
|
||||
ffmpeg_path = self._find_ffmpeg()
|
||||
if not ffmpeg_path:
|
||||
@@ -749,20 +756,115 @@ class HIFI(Provider):
|
||||
|
||||
protocol_whitelist = "file,https,http,tcp,tls,crypto,data"
|
||||
|
||||
def _run(cmd: List[str]) -> bool:
|
||||
label = str(transfer_label or output_path.name or "hifi")
|
||||
|
||||
def _estimate_total_bytes() -> Optional[int]:
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
dur = int(duration_seconds) if duration_seconds is not None else None
|
||||
except Exception:
|
||||
dur = None
|
||||
if not dur or dur <= 0:
|
||||
return None
|
||||
|
||||
qual = str(audio_quality or "").strip().lower()
|
||||
# Rough per-quality bitrate guess (bytes/sec).
|
||||
if qual in {"hi_res",
|
||||
"hi_res_lossless",
|
||||
"hires",
|
||||
"hi-res",
|
||||
"master",
|
||||
"mqa"}:
|
||||
bps = 4_608_000 # ~24-bit/96k stereo
|
||||
elif qual in {"lossless",
|
||||
"flac"}:
|
||||
bps = 1_411_200 # 16-bit/44.1k stereo
|
||||
else:
|
||||
bps = 320_000 # kbps for compressed
|
||||
|
||||
try:
|
||||
return int((bps / 8.0) * dur)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
est_total_bytes = _estimate_total_bytes()
|
||||
|
||||
def _update_transfer(total_bytes_val: Optional[int]) -> None:
|
||||
if progress is None:
|
||||
return
|
||||
try:
|
||||
progress.update_transfer(
|
||||
label=label,
|
||||
completed=int(total_bytes_val) if total_bytes_val is not None else None,
|
||||
total=est_total_bytes,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _run(cmd: List[str], *, target_path: Optional[Path] = None) -> bool:
|
||||
cmd_progress = list(cmd)
|
||||
# Enable ffmpeg progress output for live byte updates.
|
||||
cmd_progress.insert(1, "-progress")
|
||||
cmd_progress.insert(2, "pipe:1")
|
||||
cmd_progress.insert(3, "-nostats")
|
||||
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
cmd_progress,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
if proc.returncode == 0 and self._has_nonempty_file(output_path):
|
||||
return True
|
||||
if proc.stderr:
|
||||
debug(f"[hifi] ffmpeg failed: {proc.stderr.strip()}")
|
||||
except Exception as exc:
|
||||
debug(f"[hifi] ffmpeg invocation failed: {exc}")
|
||||
return False
|
||||
|
||||
last_bytes = None
|
||||
try:
|
||||
while True:
|
||||
line = proc.stdout.readline() if proc.stdout else ""
|
||||
if not line:
|
||||
if proc.poll() is not None:
|
||||
break
|
||||
time.sleep(0.05)
|
||||
continue
|
||||
|
||||
if "=" not in line:
|
||||
continue
|
||||
key, val = line.strip().split("=", 1)
|
||||
if key == "total_size":
|
||||
try:
|
||||
last_bytes = int(val)
|
||||
_update_transfer(last_bytes)
|
||||
except Exception:
|
||||
pass
|
||||
elif key == "out_time_ms":
|
||||
# Map out_time_ms to byte estimate when total_size missing.
|
||||
try:
|
||||
if est_total_bytes and val.isdigit():
|
||||
ms = int(val)
|
||||
dur_ms = (duration_seconds or 0) * 1000
|
||||
if dur_ms > 0:
|
||||
pct = min(1.0, max(0.0, ms / dur_ms))
|
||||
approx = int(est_total_bytes * pct)
|
||||
_update_transfer(approx)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
proc.wait()
|
||||
finally:
|
||||
if last_bytes is not None:
|
||||
_update_transfer(last_bytes)
|
||||
|
||||
check_path = target_path or output_path
|
||||
if proc.returncode == 0 and self._has_nonempty_file(check_path):
|
||||
return True
|
||||
|
||||
try:
|
||||
stderr_text = proc.stderr.read() if proc.stderr else ""
|
||||
if stderr_text:
|
||||
debug(f"[hifi] ffmpeg failed: {stderr_text.strip()}")
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
# Prefer remux (fast, no transcode).
|
||||
@@ -816,25 +918,14 @@ class HIFI(Provider):
|
||||
"flac",
|
||||
str(tmp_flac_path),
|
||||
]
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd_flac,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if proc.returncode == 0 and self._has_nonempty_file(tmp_flac_path):
|
||||
if tmp_flac_path != flac_path:
|
||||
try:
|
||||
tmp_flac_path.replace(flac_path)
|
||||
except Exception:
|
||||
# If rename fails, still return the temp file.
|
||||
return tmp_flac_path
|
||||
return flac_path
|
||||
if proc.stderr:
|
||||
debug(f"[hifi] ffmpeg flac fallback failed: {proc.stderr.strip()}")
|
||||
except Exception as exc:
|
||||
debug(f"[hifi] ffmpeg flac fallback invocation failed: {exc}")
|
||||
if _run(cmd_flac, target_path=tmp_flac_path) and self._has_nonempty_file(tmp_flac_path):
|
||||
if tmp_flac_path != flac_path:
|
||||
try:
|
||||
tmp_flac_path.replace(flac_path)
|
||||
except Exception:
|
||||
# If rename fails, still return the temp file.
|
||||
return tmp_flac_path
|
||||
return flac_path
|
||||
return None
|
||||
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
@@ -921,7 +1012,14 @@ class HIFI(Provider):
|
||||
# If resolve_tidal_manifest_path returned a URL, prefer feeding it directly to ffmpeg.
|
||||
if resolved_text.lower().startswith("http"):
|
||||
out_file = output_dir / f"{stem}{suffix}"
|
||||
materialized = self._ffmpeg_demux_to_audio(input_ref=resolved_text, output_path=out_file)
|
||||
materialized = self._ffmpeg_demux_to_audio(
|
||||
input_ref=resolved_text,
|
||||
output_path=out_file,
|
||||
progress=self.config.get("_pipeline_progress") if isinstance(self.config, dict) else None,
|
||||
transfer_label=title_part or getattr(result, "title", None),
|
||||
duration_seconds=self._coerce_duration_seconds(md),
|
||||
audio_quality=md.get("audioQuality") if isinstance(md, dict) else None,
|
||||
)
|
||||
if materialized is not None:
|
||||
return materialized
|
||||
|
||||
@@ -947,7 +1045,14 @@ class HIFI(Provider):
|
||||
if source_path.is_file() and source_path.suffix.lower() == ".mpd":
|
||||
# Materialize audio from the local MPD.
|
||||
out_file = output_dir / f"{stem}{suffix}"
|
||||
materialized = self._ffmpeg_demux_to_audio(input_ref=str(source_path), output_path=out_file)
|
||||
materialized = self._ffmpeg_demux_to_audio(
|
||||
input_ref=str(source_path),
|
||||
output_path=out_file,
|
||||
progress=self.config.get("_pipeline_progress") if isinstance(self.config, dict) else None,
|
||||
transfer_label=title_part or getattr(result, "title", None),
|
||||
duration_seconds=self._coerce_duration_seconds(md),
|
||||
audio_quality=md.get("audioQuality") if isinstance(md, dict) else None,
|
||||
)
|
||||
if materialized is not None:
|
||||
return materialized
|
||||
return None
|
||||
@@ -965,7 +1070,14 @@ class HIFI(Provider):
|
||||
|
||||
# As a last resort, attempt to treat the local path as an ffmpeg input.
|
||||
out_file = output_dir / f"{stem}{suffix}"
|
||||
materialized = self._ffmpeg_demux_to_audio(input_ref=resolved_text, output_path=out_file)
|
||||
materialized = self._ffmpeg_demux_to_audio(
|
||||
input_ref=resolved_text,
|
||||
output_path=out_file,
|
||||
progress=self.config.get("_pipeline_progress") if isinstance(self.config, dict) else None,
|
||||
transfer_label=title_part or getattr(result, "title", None),
|
||||
duration_seconds=self._coerce_duration_seconds(md),
|
||||
audio_quality=md.get("audioQuality") if isinstance(md, dict) else None,
|
||||
)
|
||||
return materialized
|
||||
|
||||
def _get_api_client_for_base(self, base_url: str) -> Optional[HifiApiClient]:
|
||||
@@ -1228,6 +1340,38 @@ class HIFI(Provider):
|
||||
minutes, secs = divmod(total, 60)
|
||||
return f"{minutes}:{secs:02d}"
|
||||
|
||||
@staticmethod
|
||||
def _coerce_duration_seconds(value: Any) -> Optional[int]:
|
||||
candidates = []
|
||||
candidates.append(value)
|
||||
try:
|
||||
if isinstance(value, dict):
|
||||
for key in ("duration",
|
||||
"durationSeconds",
|
||||
"duration_sec",
|
||||
"duration_ms",
|
||||
"durationMillis"):
|
||||
if key in value:
|
||||
candidates.append(value.get(key))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for cand in candidates:
|
||||
try:
|
||||
if cand is None:
|
||||
continue
|
||||
if isinstance(cand, str) and cand.strip().endswith("ms"):
|
||||
cand = cand.strip()[:-2]
|
||||
v = float(cand)
|
||||
if v <= 0:
|
||||
continue
|
||||
if v > 10_000: # treat as milliseconds
|
||||
v = v / 1000.0
|
||||
return int(round(v))
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _stringify(value: Any) -> str:
|
||||
text = str(value or "").strip()
|
||||
@@ -1305,23 +1449,18 @@ class HIFI(Provider):
|
||||
if audio_quality:
|
||||
columns.append(("Quality", audio_quality))
|
||||
|
||||
tags = {"tidal"}
|
||||
if audio_quality:
|
||||
tags.add(f"quality:{audio_quality.lower()}")
|
||||
metadata = item.get("mediaMetadata")
|
||||
if isinstance(metadata, dict):
|
||||
tag_values = metadata.get("tags") or []
|
||||
for tag in tag_values:
|
||||
if isinstance(tag, str) and tag.strip():
|
||||
tags.add(tag.strip().lower())
|
||||
|
||||
# IMPORTANT: do not retain a shared reference to the raw API dict.
|
||||
# Downstream playback (MPV) mutates metadata to cache the decoded Tidal
|
||||
# manifest path/URL. If multiple results share the same dict reference,
|
||||
# they can incorrectly collapse to a single playable target.
|
||||
full_md: Dict[str, Any] = dict(item)
|
||||
url_value = self._stringify(full_md.get("url"))
|
||||
if url_value:
|
||||
full_md["url"] = url_value
|
||||
|
||||
return SearchResult(
|
||||
tags = self._build_track_tags(full_md)
|
||||
|
||||
result = SearchResult(
|
||||
table="hifi",
|
||||
title=title,
|
||||
path=path,
|
||||
@@ -1332,6 +1471,12 @@ class HIFI(Provider):
|
||||
columns=columns,
|
||||
full_metadata=full_md,
|
||||
)
|
||||
if url_value:
|
||||
try:
|
||||
result.url = url_value
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
|
||||
def _extract_track_selection_context(
|
||||
self, selected_items: List[Any]
|
||||
@@ -1401,6 +1546,9 @@ class HIFI(Provider):
|
||||
def _fetch_track_details(self, track_id: int) -> Optional[Dict[str, Any]]:
|
||||
if track_id <= 0:
|
||||
return None
|
||||
|
||||
info_data = self._fetch_track_info(track_id)
|
||||
|
||||
for base in self.api_urls:
|
||||
endpoint = f"{base.rstrip('/')}/track/"
|
||||
try:
|
||||
@@ -1408,12 +1556,32 @@ class HIFI(Provider):
|
||||
payload = client.track(track_id) if client else None
|
||||
data = payload.get("data") if isinstance(payload, dict) else None
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
merged: Dict[str, Any] = {}
|
||||
if isinstance(info_data, dict):
|
||||
merged.update(info_data)
|
||||
merged.update(data)
|
||||
return merged
|
||||
except Exception as exc:
|
||||
log(f"[hifi] Track lookup failed for {endpoint}: {exc}", file=sys.stderr)
|
||||
continue
|
||||
return None
|
||||
|
||||
def _fetch_track_info(self, track_id: int) -> Optional[Dict[str, Any]]:
|
||||
if track_id <= 0:
|
||||
return None
|
||||
for base in self.api_urls:
|
||||
endpoint = f"{base.rstrip('/')}/info/"
|
||||
try:
|
||||
client = self._get_api_client_for_base(base)
|
||||
payload = client.info(track_id) if client else None
|
||||
data = payload.get("data") if isinstance(payload, dict) else None
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
except Exception as exc:
|
||||
debug(f"[hifi] Info lookup failed for {endpoint}: {exc}")
|
||||
continue
|
||||
return None
|
||||
|
||||
def _fetch_track_lyrics(self, track_id: int) -> Optional[Dict[str, Any]]:
|
||||
if track_id <= 0:
|
||||
return None
|
||||
@@ -1450,6 +1618,54 @@ class HIFI(Provider):
|
||||
]
|
||||
return [(name, value) for name, value in values if value]
|
||||
|
||||
def _build_track_tags(self, metadata: Dict[str, Any]) -> set[str]:
|
||||
tags: set[str] = {"tidal"}
|
||||
|
||||
audio_quality = self._stringify(metadata.get("audioQuality"))
|
||||
if audio_quality:
|
||||
tags.add(f"quality:{audio_quality.lower()}")
|
||||
|
||||
media_md = metadata.get("mediaMetadata")
|
||||
if isinstance(media_md, dict):
|
||||
tag_values = media_md.get("tags") or []
|
||||
for tag in tag_values:
|
||||
if isinstance(tag, str):
|
||||
candidate = tag.strip()
|
||||
if candidate:
|
||||
tags.add(candidate.lower())
|
||||
|
||||
title_text = self._stringify(metadata.get("title"))
|
||||
if title_text:
|
||||
tags.add(f"title:{title_text}")
|
||||
|
||||
artists = self._extract_artists(metadata)
|
||||
for artist in artists:
|
||||
artist_clean = self._stringify(artist)
|
||||
if artist_clean:
|
||||
tags.add(f"artist:{artist_clean}")
|
||||
|
||||
album_title = ""
|
||||
album_obj = metadata.get("album")
|
||||
if isinstance(album_obj, dict):
|
||||
album_title = self._stringify(album_obj.get("title"))
|
||||
else:
|
||||
album_title = self._stringify(metadata.get("album"))
|
||||
if album_title:
|
||||
tags.add(f"album:{album_title}")
|
||||
|
||||
track_no_val = metadata.get("trackNumber") or metadata.get("track_number")
|
||||
if track_no_val is not None:
|
||||
try:
|
||||
track_int = int(track_no_val)
|
||||
if track_int > 0:
|
||||
tags.add(f"track:{track_int}")
|
||||
except Exception:
|
||||
track_text = self._stringify(track_no_val)
|
||||
if track_text:
|
||||
tags.add(f"track:{track_text}")
|
||||
|
||||
return tags
|
||||
|
||||
def selector(
|
||||
self,
|
||||
selected_items: List[Any],
|
||||
@@ -1476,16 +1692,32 @@ class HIFI(Provider):
|
||||
else None
|
||||
)
|
||||
|
||||
try:
|
||||
debug(
|
||||
f"[hifi.selector] table_type={table_type} stage_is_last={stage_is_last} selected_count={len(selected_items) if selected_items else 0}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Artist selection: selecting @N should open an albums list.
|
||||
if isinstance(table_type, str) and table_type.strip().lower() == "hifi.artist":
|
||||
contexts = self._extract_artist_selection_context(selected_items)
|
||||
try:
|
||||
debug(f"[hifi.selector] artist contexts={len(contexts)}")
|
||||
except Exception:
|
||||
pass
|
||||
if not contexts:
|
||||
return False
|
||||
|
||||
artist_id, artist_name = contexts[0]
|
||||
album_results = self._albums_for_artist(artist_id=artist_id, artist_name=artist_name, limit=200)
|
||||
if not album_results:
|
||||
return False
|
||||
try:
|
||||
from SYS.rich_display import stdout_console
|
||||
stdout_console().print(f"[bold yellow][hifi] No albums found for {artist_name}[/]")
|
||||
except Exception:
|
||||
log(f"[hifi] No albums found for {artist_name}")
|
||||
return True
|
||||
|
||||
try:
|
||||
from SYS.rich_display import stdout_console
|
||||
@@ -1531,6 +1763,10 @@ class HIFI(Provider):
|
||||
# Album selection: selecting @N should open the track list for that album.
|
||||
if isinstance(table_type, str) and table_type.strip().lower() == "hifi.album":
|
||||
contexts = self._extract_album_selection_context(selected_items)
|
||||
try:
|
||||
debug(f"[hifi.selector] album contexts={len(contexts)}")
|
||||
except Exception:
|
||||
pass
|
||||
if not contexts:
|
||||
return False
|
||||
|
||||
@@ -1605,6 +1841,10 @@ class HIFI(Provider):
|
||||
return False
|
||||
|
||||
contexts = self._extract_track_selection_context(selected_items)
|
||||
try:
|
||||
debug(f"[hifi.selector] track contexts={len(contexts)}")
|
||||
except Exception:
|
||||
pass
|
||||
if not contexts:
|
||||
return False
|
||||
|
||||
@@ -1657,6 +1897,9 @@ class HIFI(Provider):
|
||||
insert_pos = 2 if artist_display else 1
|
||||
columns.insert(insert_pos, ("Album", album_title))
|
||||
|
||||
tags = self._build_track_tags(detail)
|
||||
url_value = self._stringify(detail.get("url"))
|
||||
|
||||
result = SearchResult(
|
||||
table="hifi",
|
||||
title=title,
|
||||
@@ -1666,7 +1909,13 @@ class HIFI(Provider):
|
||||
media_kind="audio",
|
||||
columns=columns,
|
||||
full_metadata=detail,
|
||||
tag=tags,
|
||||
)
|
||||
if url_value:
|
||||
try:
|
||||
result.url = url_value
|
||||
except Exception:
|
||||
pass
|
||||
table.add_result(result)
|
||||
try:
|
||||
results_payload.append(result.to_dict())
|
||||
|
||||
@@ -8,12 +8,11 @@ from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Callable, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from API.HTTP import HTTPClient
|
||||
from API.HTTP import HTTPClient, _download_direct_file
|
||||
from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_torrent_file
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from ProviderCore.download import sanitize_filename
|
||||
from SYS.download import _download_direct_file
|
||||
from SYS.logger import log
|
||||
from SYS.logger import log, debug
|
||||
from SYS.models import DownloadError
|
||||
|
||||
_HOSTS_CACHE_TTL_SECONDS = 24 * 60 * 60
|
||||
@@ -302,7 +301,7 @@ def _dispatch_alldebrid_magnet_search(
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
log(f"[alldebrid] Sent magnet {magnet_id} to AllDebrid for download", file=sys.stderr)
|
||||
debug(f"[alldebrid] Sent magnet {magnet_id} to AllDebrid for download")
|
||||
|
||||
|
||||
def prepare_magnet(
|
||||
|
||||
192
Provider/hello_provider.py
Normal file
192
Provider/hello_provider.py
Normal file
@@ -0,0 +1,192 @@
|
||||
"""Example provider template for use as a starter kit.
|
||||
|
||||
This minimal provider demonstrates the typical hooks a provider may implement:
|
||||
- `validate()` to assert it's usable
|
||||
- `search()` to return `SearchResult` items
|
||||
- `download()` to persist a sample file (useful for local tests)
|
||||
|
||||
See `docs/provider_guide.md` for authoring guidance.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
|
||||
|
||||
class HelloProvider(Provider):
|
||||
"""Very small example provider suitable as a template.
|
||||
|
||||
- Table name: `hello`
|
||||
- Usage: `search-file -provider hello "query"`
|
||||
- Selecting a row and piping into `download-file` will call `download()`.
|
||||
"""
|
||||
|
||||
URL = ("hello:",)
|
||||
URL_DOMAINS = ()
|
||||
|
||||
def validate(self) -> bool:
|
||||
# No configuration required; always available for testing/demo purposes.
|
||||
return True
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
q = (query or "").strip()
|
||||
results: List[SearchResult] = []
|
||||
if not q or q in {"*", "all", "list"}:
|
||||
q = "example"
|
||||
|
||||
# Emit up to `limit` tiny example results.
|
||||
n = min(max(1, int(limit)), 3)
|
||||
for i in range(1, n + 1):
|
||||
title = f"{q} sample {i}"
|
||||
path = f"https://example.org/{q}/{i}"
|
||||
sr = SearchResult(
|
||||
table="hello",
|
||||
title=title,
|
||||
path=path,
|
||||
detail="Example provider result",
|
||||
media_kind="file",
|
||||
columns=[("Example", "yes")],
|
||||
full_metadata={"example_index": i},
|
||||
)
|
||||
results.append(sr)
|
||||
|
||||
return results[: max(0, int(limit))]
|
||||
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
"""Create a small text file to simulate a download.
|
||||
|
||||
This keeps the example self-contained (no network access required) and
|
||||
makes it straightforward to test provider behavior with `pytest`.
|
||||
"""
|
||||
try:
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
title = str(getattr(result, "title", "hello") or "hello").strip()
|
||||
safe = "".join(c if c.isalnum() or c in ("-", "_", ".") else "_" for c in title)
|
||||
fname = f"{safe}.txt" if safe else "hello.txt"
|
||||
dest = Path(output_dir) / fname
|
||||
try:
|
||||
dest.write_text(f"Hello from HelloProvider\nsource: {result.path}\n", encoding="utf-8")
|
||||
return dest
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def selector(
|
||||
self,
|
||||
selected_items: List[Any],
|
||||
*,
|
||||
ctx: Any,
|
||||
stage_is_last: bool = True,
|
||||
**_kwargs: Any,
|
||||
) -> bool:
|
||||
"""Present a simple details table when a HelloProvider row is selected.
|
||||
|
||||
This demonstrates how providers can implement custom `@N` selection
|
||||
behavior by constructing a `ResultTable`, populating it with
|
||||
provider-specific rows, and instructing the CLI to show the table.
|
||||
"""
|
||||
if not stage_is_last:
|
||||
return False
|
||||
|
||||
def _as_payload(item: Any) -> Dict[str, Any]:
|
||||
if isinstance(item, dict):
|
||||
return dict(item)
|
||||
try:
|
||||
if hasattr(item, "to_dict"):
|
||||
maybe = item.to_dict()
|
||||
if isinstance(maybe, dict):
|
||||
return maybe
|
||||
except Exception:
|
||||
pass
|
||||
payload: Dict[str, Any] = {}
|
||||
try:
|
||||
payload = {
|
||||
"title": getattr(item, "title", None),
|
||||
"path": getattr(item, "path", None),
|
||||
"table": getattr(item, "table", None),
|
||||
"annotations": getattr(item, "annotations", None),
|
||||
"media_kind": getattr(item, "media_kind", None),
|
||||
"full_metadata": getattr(item, "full_metadata", None),
|
||||
}
|
||||
except Exception:
|
||||
payload = {}
|
||||
return payload
|
||||
|
||||
chosen: List[Dict[str, Any]] = []
|
||||
for item in selected_items or []:
|
||||
payload = _as_payload(item)
|
||||
meta = payload.get("full_metadata") or {}
|
||||
if not isinstance(meta, dict):
|
||||
meta = {}
|
||||
idx = meta.get("example_index")
|
||||
if idx is None:
|
||||
continue
|
||||
title = str(payload.get("title") or payload.get("path") or "").strip() or f"hello-{idx}"
|
||||
chosen.append({"index": idx, "title": title, "path": payload.get("path")})
|
||||
|
||||
if not chosen:
|
||||
return False
|
||||
|
||||
target = chosen[0]
|
||||
idx = target.get("index")
|
||||
title = target.get("title") or f"hello-{idx}"
|
||||
|
||||
try:
|
||||
from SYS.result_table import ResultTable
|
||||
from SYS.rich_display import stdout_console
|
||||
except Exception:
|
||||
# If ResultTable isn't available, consider selection handled
|
||||
return True
|
||||
|
||||
table = ResultTable(f"Hello Details: {title}").set_preserve_order(True)
|
||||
table.set_table("hello")
|
||||
try:
|
||||
table.set_table_metadata({"provider": "hello", "view": "details", "example_index": idx})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
table.set_source_command("download-file", [])
|
||||
|
||||
results_payload: List[Dict[str, Any]] = []
|
||||
for part in ("a", "b"):
|
||||
file_title = f"{title} - part {part}"
|
||||
file_path = f"{target.get('path')}/{part}"
|
||||
sr = SearchResult(
|
||||
table="hello",
|
||||
title=file_title,
|
||||
path=file_path,
|
||||
detail=f"Part {part}",
|
||||
media_kind="file",
|
||||
columns=[("Part", part)],
|
||||
full_metadata={"part": part, "example_index": idx},
|
||||
)
|
||||
table.add_result(sr)
|
||||
try:
|
||||
results_payload.append(sr.to_dict())
|
||||
except Exception:
|
||||
results_payload.append({"table": sr.table, "title": sr.title, "path": sr.path})
|
||||
|
||||
try:
|
||||
ctx.set_last_result_table(table, results_payload)
|
||||
ctx.set_current_stage_table(table)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
stdout_console().print()
|
||||
stdout_console().print(table)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return True
|
||||
@@ -1224,6 +1224,9 @@ class LibgenSearch:
|
||||
if results:
|
||||
_call(log_info, f"[libgen] Using mirror: {mirror}")
|
||||
return results
|
||||
else:
|
||||
_call(log_info, f"[libgen] Mirror returned 0 results; stopping mirror fallback")
|
||||
break
|
||||
except requests.exceptions.Timeout:
|
||||
_call(log_info, f"[libgen] Mirror timed out: {mirror}")
|
||||
continue
|
||||
|
||||
@@ -304,7 +304,7 @@ class PodcastIndex(Provider):
|
||||
pass
|
||||
|
||||
try:
|
||||
from SYS.download import _download_direct_file
|
||||
from API.HTTP import _download_direct_file
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
|
||||
442
Provider/torrent.py
Normal file
442
Provider/torrent.py
Normal file
@@ -0,0 +1,442 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import debug, log
|
||||
try: # Preferred HTML parser
|
||||
from lxml import html as lxml_html
|
||||
except Exception: # pragma: no cover - optional
|
||||
lxml_html = None # type: ignore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TorrentInfo:
|
||||
name: str
|
||||
url: str
|
||||
seeders: int
|
||||
leechers: int
|
||||
size: str
|
||||
source: str
|
||||
category: Optional[str] = None
|
||||
uploader: Optional[str] = None
|
||||
magnet: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchParams:
|
||||
name: str
|
||||
category: Optional[str] = None
|
||||
order_column: Optional[str] = None
|
||||
order_ascending: bool = False
|
||||
|
||||
|
||||
_MAGNET_RE = re.compile(r"^magnet", re.IGNORECASE)
|
||||
|
||||
|
||||
class Scraper:
|
||||
def __init__(self, name: str, base_url: str, timeout: float = 10.0) -> None:
|
||||
self.name = name
|
||||
self.base = base_url.rstrip("/")
|
||||
self.timeout = timeout
|
||||
self.headers = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"
|
||||
)
|
||||
}
|
||||
self.params: Optional[SearchParams] = None
|
||||
|
||||
def find(self, params: SearchParams, pages: int = 1) -> List[TorrentInfo]:
|
||||
self.params = params
|
||||
results: List[TorrentInfo] = []
|
||||
for page in range(1, max(1, pages) + 1):
|
||||
try:
|
||||
results.extend(self._get_page(page))
|
||||
except Exception as exc:
|
||||
debug(f"[{self.name}] page fetch failed: {exc}")
|
||||
return results
|
||||
|
||||
def _get_page(self, page: int) -> List[TorrentInfo]:
|
||||
url, payload = self._request_data(page)
|
||||
try:
|
||||
resp = requests.get(
|
||||
url,
|
||||
params=payload,
|
||||
headers=self.headers,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return self._parse_search(resp)
|
||||
except Exception as exc:
|
||||
debug(f"[{self.name}] request failed: {exc}")
|
||||
return []
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
return self.base, {}
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]: # pragma: no cover - interface
|
||||
raise NotImplementedError
|
||||
|
||||
def _parse_detail(self, url: str) -> Optional[str]: # optional override
|
||||
try:
|
||||
resp = requests.get(url, headers=self.headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
return self._parse_detail_response(resp)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _parse_detail_response(self, response: requests.Response) -> Optional[str]: # pragma: no cover - interface
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _int_from_text(value: Any) -> int:
|
||||
try:
|
||||
return int(str(value).strip().replace(",", ""))
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
class NyaaScraper(Scraper):
|
||||
def __init__(self) -> None:
|
||||
super().__init__("nyaa.si", "https://nyaa.si")
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
params = self.params or SearchParams(name="")
|
||||
payload = {
|
||||
"p": page,
|
||||
"q": params.name,
|
||||
"c": params.category or "0_0",
|
||||
"f": "0",
|
||||
}
|
||||
if params.order_column:
|
||||
payload["s"] = params.order_column
|
||||
payload["o"] = "asc" if params.order_ascending else "desc"
|
||||
return f"{self.base}/", payload
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
if lxml_html is None:
|
||||
return []
|
||||
doc = lxml_html.fromstring(response.text)
|
||||
rows = doc.xpath("//table//tbody/tr")
|
||||
results: List[TorrentInfo] = []
|
||||
for row in rows:
|
||||
cells = row.xpath("./td")
|
||||
if len(cells) < 7:
|
||||
continue
|
||||
category_cell, name_cell, links_cell, size_cell, _, seed_cell, leech_cell, *_ = cells
|
||||
|
||||
name_links = name_cell.xpath("./a")
|
||||
name_tag = name_links[1] if len(name_links) > 1 else (name_links[0] if name_links else None)
|
||||
if name_tag is None:
|
||||
continue
|
||||
|
||||
name = name_tag.get("title") or (name_tag.text_content() or "").strip()
|
||||
url = name_tag.get("href") or ""
|
||||
|
||||
magnet_link = None
|
||||
magnet_candidates = links_cell.xpath('.//a[starts-with(@href,"magnet:")]/@href')
|
||||
if magnet_candidates:
|
||||
magnet_link = magnet_candidates[0]
|
||||
|
||||
category_title = None
|
||||
cat_titles = category_cell.xpath(".//a/@title")
|
||||
if cat_titles:
|
||||
category_title = cat_titles[0]
|
||||
|
||||
results.append(
|
||||
TorrentInfo(
|
||||
name=name,
|
||||
url=f"{self.base}{url}",
|
||||
seeders=self._int_from_text(seed_cell.text_content()),
|
||||
leechers=self._int_from_text(leech_cell.text_content()),
|
||||
size=(size_cell.text_content() or "").strip(),
|
||||
source=self.name,
|
||||
category=category_title,
|
||||
magnet=magnet_link,
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
class X1337Scraper(Scraper):
|
||||
def __init__(self) -> None:
|
||||
super().__init__("1337x.to", "https://1337x.to")
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
params = self.params or SearchParams(name="")
|
||||
order = None
|
||||
if params.order_column:
|
||||
direction = "asc" if params.order_ascending else "desc"
|
||||
order = f"{params.order_column}/{direction}"
|
||||
|
||||
category = params.category
|
||||
name = requests.utils.quote(params.name)
|
||||
|
||||
if order and category:
|
||||
path = f"/sort-category-search/{name}/{category}/{order}"
|
||||
elif category:
|
||||
path = f"/category-search/{name}/{category}"
|
||||
elif order:
|
||||
path = f"/sort-search/{name}/{order}"
|
||||
else:
|
||||
path = f"/search/{name}"
|
||||
|
||||
url = f"{self.base}{path}/{page}/"
|
||||
return url, {}
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
if lxml_html is None:
|
||||
return []
|
||||
doc = lxml_html.fromstring(response.text)
|
||||
rows = doc.xpath("//table//tbody/tr")
|
||||
results: List[TorrentInfo] = []
|
||||
for row in rows:
|
||||
cells = row.xpath("./td")
|
||||
if len(cells) < 6:
|
||||
continue
|
||||
name_cell, seeds_cell, leech_cell, _, size_cell, uploader_cell = cells
|
||||
|
||||
links = name_cell.xpath(".//a")
|
||||
if len(links) < 2:
|
||||
continue
|
||||
|
||||
torrent_path = links[1].get("href")
|
||||
torrent_url = f"{self.base}{torrent_path}" if torrent_path else ""
|
||||
|
||||
info = TorrentInfo(
|
||||
name=(links[1].text_content() or "").strip(),
|
||||
url=torrent_url,
|
||||
seeders=self._int_from_text(seeds_cell.text_content()),
|
||||
leechers=self._int_from_text(leech_cell.text_content()),
|
||||
size=(size_cell.text_content() or "").strip().replace(",", ""),
|
||||
source=self.name,
|
||||
uploader=(uploader_cell.text_content() or "").strip() if uploader_cell is not None else None,
|
||||
)
|
||||
|
||||
if not info.magnet:
|
||||
info.magnet = self._parse_detail(info.url)
|
||||
results.append(info)
|
||||
return results
|
||||
|
||||
def _parse_detail_response(self, response: requests.Response) -> Optional[str]:
|
||||
if lxml_html is None:
|
||||
return None
|
||||
doc = lxml_html.fromstring(response.text)
|
||||
links = doc.xpath("//main//a[starts-with(@href,'magnet:')]/@href")
|
||||
return links[0] if links else None
|
||||
|
||||
|
||||
class YTSScraper(Scraper):
|
||||
TRACKERS = "&tr=".join(
|
||||
[
|
||||
"udp://open.demonii.com:1337/announce",
|
||||
"udp://tracker.opentrackr.org:1337/announce",
|
||||
"udp://tracker.leechers-paradise.org:6969",
|
||||
]
|
||||
)
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("yts.mx", "https://yts.mx/api/v2")
|
||||
self.headers = {}
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
params = self.params or SearchParams(name="")
|
||||
payload = {
|
||||
"limit": 50,
|
||||
"page": page,
|
||||
"query_term": params.name,
|
||||
"sort_by": "seeds",
|
||||
"order_by": "desc" if not params.order_ascending else "asc",
|
||||
}
|
||||
return f"{self.base}/list_movies.json", payload
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
results: List[TorrentInfo] = []
|
||||
data = response.json()
|
||||
if data.get("status") != "ok":
|
||||
return results
|
||||
movies = (data.get("data") or {}).get("movies") or []
|
||||
for movie in movies:
|
||||
torrents = movie.get("torrents") or []
|
||||
if not torrents:
|
||||
continue
|
||||
tor = max(torrents, key=lambda t: t.get("seeds", 0))
|
||||
name = movie.get("title") or "unknown"
|
||||
info = TorrentInfo(
|
||||
name=name,
|
||||
url=str(movie.get("id") or ""),
|
||||
seeders=int(tor.get("seeds", 0) or 0),
|
||||
leechers=int(tor.get("peers", 0) or 0),
|
||||
size=str(tor.get("size") or ""),
|
||||
source=self.name,
|
||||
category=(movie.get("genres") or [None])[0],
|
||||
magnet=self._build_magnet(tor, name),
|
||||
)
|
||||
results.append(info)
|
||||
return results
|
||||
|
||||
def _build_magnet(self, torrent: Dict[str, Any], name: str) -> str:
|
||||
return (
|
||||
f"magnet:?xt=urn:btih:{torrent.get('hash')}"
|
||||
f"&dn={requests.utils.quote(name)}&tr={self.TRACKERS}"
|
||||
)
|
||||
|
||||
|
||||
class ApiBayScraper(Scraper):
|
||||
"""Scraper for apibay.org (The Pirate Bay API clone)."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("apibay.org", "https://apibay.org")
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
_ = page # single-page API
|
||||
params = self.params or SearchParams(name="")
|
||||
return f"{self.base}/q.php", {"q": params.name}
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
results: List[TorrentInfo] = []
|
||||
try:
|
||||
data = response.json()
|
||||
except Exception:
|
||||
return results
|
||||
if not isinstance(data, list):
|
||||
return results
|
||||
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
name = str(item.get("name") or "").strip()
|
||||
info_hash = str(item.get("info_hash") or "").strip()
|
||||
if not name or not info_hash:
|
||||
continue
|
||||
|
||||
magnet = self._build_magnet(info_hash, name)
|
||||
seeders = self._int_from_text(item.get("seeders"))
|
||||
leechers = self._int_from_text(item.get("leechers"))
|
||||
size_raw = str(item.get("size") or "").strip()
|
||||
size_fmt = self._format_size(size_raw)
|
||||
|
||||
results.append(
|
||||
TorrentInfo(
|
||||
name=name,
|
||||
url=f"{self.base}/description.php?id={item.get('id')}",
|
||||
seeders=seeders,
|
||||
leechers=leechers,
|
||||
size=size_fmt,
|
||||
source=self.name,
|
||||
category=str(item.get("category") or ""),
|
||||
uploader=str(item.get("username") or ""),
|
||||
magnet=magnet,
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def _build_magnet(info_hash: str, name: str) -> str:
|
||||
return f"magnet:?xt=urn:btih:{info_hash}&dn={requests.utils.quote(name)}"
|
||||
|
||||
@staticmethod
|
||||
def _format_size(size_raw: str) -> str:
|
||||
try:
|
||||
size_int = int(size_raw)
|
||||
if size_int <= 0:
|
||||
return size_raw
|
||||
gb = size_int / (1024 ** 3)
|
||||
if gb >= 1:
|
||||
return f"{gb:.1f} GB"
|
||||
mb = size_int / (1024 ** 2)
|
||||
return f"{mb:.1f} MB"
|
||||
except Exception:
|
||||
return size_raw
|
||||
|
||||
|
||||
class Torrent(Provider):
|
||||
TABLE_AUTO_STAGES = {"torrent": ["download-file"]}
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||||
super().__init__(config)
|
||||
self.scrapers: List[Scraper] = []
|
||||
# JSON APIs (no lxml dependency)
|
||||
self.scrapers.append(ApiBayScraper())
|
||||
self.scrapers.append(YTSScraper())
|
||||
# HTML scrapers require lxml
|
||||
if lxml_html is not None:
|
||||
self.scrapers.append(NyaaScraper())
|
||||
self.scrapers.append(X1337Scraper())
|
||||
else:
|
||||
log("[torrent] lxml not installed; skipping Nyaa/1337x scrapers", file=None)
|
||||
|
||||
def validate(self) -> bool:
|
||||
return bool(self.scrapers)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**_kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
q = str(query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
params = SearchParams(name=q, order_column="seeders", order_ascending=False)
|
||||
results: List[TorrentInfo] = []
|
||||
|
||||
for scraper in self.scrapers:
|
||||
try:
|
||||
scraped = scraper.find(params, pages=1)
|
||||
results.extend(scraped)
|
||||
except Exception as exc:
|
||||
debug(f"[torrent] scraper {scraper.name} failed: {exc}")
|
||||
continue
|
||||
|
||||
results = sorted(results, key=lambda r: r.seeders, reverse=True)
|
||||
if limit and limit > 0:
|
||||
results = results[:limit]
|
||||
|
||||
out: List[SearchResult] = []
|
||||
for item in results:
|
||||
path = item.magnet or item.url
|
||||
columns = [
|
||||
("TITLE", item.name),
|
||||
("Seeds", str(item.seeders)),
|
||||
("Leechers", str(item.leechers)),
|
||||
("Size", item.size or ""),
|
||||
("Source", item.source),
|
||||
]
|
||||
if item.uploader:
|
||||
columns.append(("Uploader", item.uploader))
|
||||
|
||||
md = {
|
||||
"magnet": item.magnet,
|
||||
"url": item.url,
|
||||
"source": item.source,
|
||||
"seeders": item.seeders,
|
||||
"leechers": item.leechers,
|
||||
"size": item.size,
|
||||
}
|
||||
if item.uploader:
|
||||
md["uploader"] = item.uploader
|
||||
|
||||
out.append(
|
||||
SearchResult(
|
||||
table="torrent",
|
||||
title=item.name,
|
||||
path=path,
|
||||
detail=f"Seeds:{item.seeders} | Size:{item.size}",
|
||||
annotations=[item.source],
|
||||
media_kind="other",
|
||||
columns=columns,
|
||||
full_metadata=md,
|
||||
tag={"torrent"},
|
||||
)
|
||||
)
|
||||
return out
|
||||
185
Provider/vimm.py
Normal file
185
Provider/vimm.py
Normal file
@@ -0,0 +1,185 @@
|
||||
"""Vimm provider skeleton (lxml + HTTPClient).
|
||||
|
||||
This is a lightweight, resilient provider implementation intended as a
|
||||
starting point for implementing a full Vimm (vimm.net) provider.
|
||||
|
||||
It prefers server-rendered HTML parsing via lxml and uses the repo's
|
||||
`HTTPClient` helper for robust HTTP calls (timeouts/retries).
|
||||
|
||||
Selectors in `search()` are intentionally permissive heuristics; update the
|
||||
XPaths to match the real site HTML when you have an actual fixture.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import urljoin, quote_plus
|
||||
from lxml import html as lxml_html
|
||||
|
||||
from API.HTTP import HTTPClient
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import log, debug
|
||||
|
||||
|
||||
class Vimm(Provider):
|
||||
"""Provider for vimm.net vault listings (skeleton).
|
||||
|
||||
- Uses lxml for parsing
|
||||
- No authentication required
|
||||
"""
|
||||
|
||||
URL = ("https://vimm.net/vault/",)
|
||||
URL_DOMAINS = ("vimm.net",)
|
||||
|
||||
def validate(self) -> bool:
|
||||
# This provider has no required config; consider more checks if needed.
|
||||
return True
|
||||
|
||||
def _parse_size_bytes(self, size_str: str) -> Optional[int]:
|
||||
if not size_str:
|
||||
return None
|
||||
try:
|
||||
s = str(size_str or "").strip().replace(",", "")
|
||||
m = re.search(r"(?P<val>[\d\.]+)\s*(?P<unit>[KMGT]?B)?", s, flags=re.I)
|
||||
if not m:
|
||||
return None
|
||||
val = float(m.group("val"))
|
||||
unit = (m.group("unit") or "B").upper()
|
||||
mul = {
|
||||
"B": 1,
|
||||
"KB": 1024,
|
||||
"MB": 1024 ** 2,
|
||||
"GB": 1024 ** 3,
|
||||
"TB": 1024 ** 4,
|
||||
}.get(unit, 1)
|
||||
return int(val * mul)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
q = (query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
# Build search/list URL
|
||||
base = "https://vimm.net/vault/"
|
||||
url = f"{base}?p=list&q={quote_plus(q)}"
|
||||
|
||||
try:
|
||||
with HTTPClient(timeout=20.0) as client:
|
||||
resp = client.get(url)
|
||||
content = resp.content
|
||||
except Exception as exc:
|
||||
log(f"[vimm] HTTP fetch failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
try:
|
||||
doc = lxml_html.fromstring(content)
|
||||
except Exception as exc:
|
||||
log(f"[vimm] HTML parse failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
results: List[SearchResult] = []
|
||||
|
||||
# Candidate XPaths for list items (tweak to match real DOM)
|
||||
container_xpaths = [
|
||||
'//div[contains(@class,"list-item")]',
|
||||
'//div[contains(@class,"result")]',
|
||||
'//li[contains(@class,"item")]',
|
||||
'//tr[contains(@class,"result")]',
|
||||
'//article',
|
||||
]
|
||||
|
||||
nodes = []
|
||||
for xp in container_xpaths:
|
||||
try:
|
||||
found = doc.xpath(xp)
|
||||
if found:
|
||||
nodes = found
|
||||
debug(f"[vimm] using xpath {xp} -> {len(found)} nodes")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Fallback: try generic anchors under a list area
|
||||
if not nodes:
|
||||
try:
|
||||
nodes = doc.xpath('//div[contains(@id,"list")]/div') or doc.xpath('//div[contains(@class,"results")]/div')
|
||||
except Exception:
|
||||
nodes = []
|
||||
|
||||
for n in (nodes or [])[: max(1, int(limit))]:
|
||||
try:
|
||||
# Prefer explicit title anchors
|
||||
title = None
|
||||
href = None
|
||||
try:
|
||||
# a few heuristic searches for a meaningful anchor
|
||||
a = (n.xpath('.//a[contains(@class,"title")]') or
|
||||
n.xpath('.//h2/a') or
|
||||
n.xpath('.//a[contains(@href,"/vault/")]') or
|
||||
n.xpath('.//a'))
|
||||
if a:
|
||||
a0 = a[0]
|
||||
title = a0.text_content().strip()
|
||||
href = a0.get('href')
|
||||
except Exception:
|
||||
title = None
|
||||
href = None
|
||||
|
||||
if not title:
|
||||
title = (n.text_content() or "").strip()
|
||||
|
||||
path = urljoin(base, href) if href else ""
|
||||
|
||||
# Extract size & platform heuristics
|
||||
size_text = ""
|
||||
try:
|
||||
s = n.xpath('.//*[contains(@class,"size")]/text()') or n.xpath('.//span[contains(text(),"MB") or contains(text(),"GB")]/text()')
|
||||
if s:
|
||||
size_text = str(s[0]).strip()
|
||||
except Exception:
|
||||
size_text = ""
|
||||
|
||||
size_bytes = self._parse_size_bytes(size_text)
|
||||
|
||||
platform = ""
|
||||
try:
|
||||
p = n.xpath('.//*[contains(@class,"platform")]/text()')
|
||||
if p:
|
||||
platform = str(p[0]).strip()
|
||||
except Exception:
|
||||
platform = ""
|
||||
|
||||
columns = []
|
||||
if platform:
|
||||
columns.append(("Platform", platform))
|
||||
if size_text:
|
||||
columns.append(("Size", size_text))
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="vimm",
|
||||
title=str(title or "").strip(),
|
||||
path=str(path or ""),
|
||||
detail="",
|
||||
annotations=[],
|
||||
media_kind="file",
|
||||
size_bytes=size_bytes,
|
||||
tag={"vimm"},
|
||||
columns=columns,
|
||||
full_metadata={"raw": lxml_html.tostring(n, encoding="unicode")},
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return results[: max(0, int(limit))]
|
||||
Reference in New Issue
Block a user