Files
Medios-Macina/Provider/vimm.py

292 lines
9.8 KiB
Python

"""Vimm provider skeleton (lxml + HTTPClient).
This is a lightweight, resilient provider implementation intended as a
starting point for implementing a full Vimm (vimm.net) provider.
It prefers server-rendered HTML parsing via lxml and uses the repo's
`HTTPClient` helper for robust HTTP calls (timeouts/retries).
Selectors in `search()` are intentionally permissive heuristics; update the
XPaths to match the real site HTML when you have an actual fixture.
"""
from __future__ import annotations
import re
import sys
from typing import Any, Dict, List, Optional
from urllib.parse import urljoin, quote_plus
from lxml import html as lxml_html
from API.HTTP import HTTPClient
from ProviderCore.base import Provider, SearchResult
from SYS.logger import log, debug
class Vimm(Provider):
"""Provider for vimm.net vault listings (skeleton).
- Uses lxml for parsing
- No authentication required
"""
URL = ("https://vimm.net/vault/",)
URL_DOMAINS = ("vimm.net",)
def validate(self) -> bool:
# This provider has no required config; consider more checks if needed.
return True
def _parse_size_bytes(self, size_str: str) -> Optional[int]:
if not size_str:
return None
try:
s = str(size_str or "").strip().replace(",", "")
m = re.search(r"(?P<val>[\d\.]+)\s*(?P<unit>[KMGT]?B)?", s, flags=re.I)
if not m:
return None
val = float(m.group("val"))
unit = (m.group("unit") or "B").upper()
mul = {
"B": 1,
"KB": 1024,
"MB": 1024 ** 2,
"GB": 1024 ** 3,
"TB": 1024 ** 4,
}.get(unit, 1)
return int(val * mul)
except Exception:
return None
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
q = (query or "").strip()
if not q:
return []
# Build search/list URL
base = "https://vimm.net/vault/"
url = f"{base}?p=list&q={quote_plus(q)}"
try:
with HTTPClient(timeout=20.0) as client:
resp = client.get(url)
content = resp.content
except Exception as exc:
# Log and return empty results on failure. The HTTP client will
# already attempt a certifi-based retry in common certificate
# verification failure cases; if you still see cert errors, install
# the `certifi` package or configure SSL_CERT_FILE to point at a
# valid CA bundle.
log(f"[vimm] HTTP fetch failed: {exc}", file=sys.stderr)
return []
try:
doc = lxml_html.fromstring(content)
except Exception as exc:
log(f"[vimm] HTML parse failed: {exc}", file=sys.stderr)
return []
results: List[SearchResult] = []
# Candidate XPaths for list items (tweak to match real DOM)
container_xpaths = [
'//div[contains(@class,"list-item")]',
'//div[contains(@class,"result")]',
'//li[contains(@class,"item")]',
'//tr[contains(@class,"result")]',
'//article',
]
nodes = []
for xp in container_xpaths:
try:
found = doc.xpath(xp)
if found:
nodes = found
debug(f"[vimm] using xpath {xp} -> {len(found)} nodes")
break
except Exception:
continue
# Fallback: try generic anchors under a list area
if not nodes:
try:
nodes = doc.xpath('//div[contains(@id,"list")]/div') or doc.xpath('//div[contains(@class,"results")]/div')
except Exception:
nodes = []
for n in (nodes or [])[: max(1, int(limit))]:
try:
# Prefer explicit title anchors
title = None
href = None
try:
# a few heuristic searches for a meaningful anchor
a = (n.xpath('.//a[contains(@class,"title")]') or
n.xpath('.//h2/a') or
n.xpath('.//a[contains(@href,"/vault/")]') or
n.xpath('.//a'))
if a:
a0 = a[0]
title = a0.text_content().strip()
href = a0.get('href')
except Exception:
title = None
href = None
if not title:
title = (n.text_content() or "").strip()
path = urljoin(base, href) if href else ""
# Extract size & platform heuristics
size_text = ""
try:
s = n.xpath('.//*[contains(@class,"size")]/text()') or n.xpath('.//span[contains(text(),"MB") or contains(text(),"GB")]/text()')
if s:
size_text = str(s[0]).strip()
except Exception:
size_text = ""
size_bytes = self._parse_size_bytes(size_text)
platform = ""
try:
p = n.xpath('.//*[contains(@class,"platform")]/text()')
if p:
platform = str(p[0]).strip()
except Exception:
platform = ""
columns = []
if platform:
columns.append(("Platform", platform))
if size_text:
columns.append(("Size", size_text))
results.append(
SearchResult(
table="vimm",
title=str(title or "").strip(),
path=str(path or ""),
detail="",
annotations=[],
media_kind="file",
size_bytes=size_bytes,
tag={"vimm"},
columns=columns,
full_metadata={"raw": lxml_html.tostring(n, encoding="unicode")},
)
)
except Exception:
continue
return results[: max(0, int(limit))]
# Bridge into the ResultTable provider registry so vimm results can be rendered
# with the new provider/table/select API.
try:
from SYS.result_table_adapters import register_provider
from SYS.result_table_api import ResultModel
from SYS.result_table_api import title_column, ext_column, metadata_column
def _convert_search_result_to_model(sr):
try:
if hasattr(sr, "to_dict"):
d = sr.to_dict()
elif isinstance(sr, dict):
d = sr
else:
d = {
"title": getattr(sr, "title", str(sr)),
"path": getattr(sr, "path", None),
"size_bytes": getattr(sr, "size_bytes", None),
"columns": getattr(sr, "columns", None),
"full_metadata": getattr(sr, "full_metadata", None),
}
except Exception:
d = {"title": getattr(sr, "title", str(sr))}
title = d.get("title") or ""
path = d.get("path") or None
size = d.get("size_bytes") or None
ext = None
try:
if path:
from pathlib import Path
suf = Path(str(path)).suffix
if suf:
ext = suf.lstrip(".")
except Exception:
ext = None
metadata = d.get("full_metadata") or d.get("metadata") or {}
return ResultModel(
title=str(title),
path=str(path) if path is not None else None,
ext=str(ext) if ext is not None else None,
size_bytes=int(size) if size is not None else None,
metadata=metadata or {},
source="vimm",
)
def _adapter(items):
for it in items:
yield _convert_search_result_to_model(it)
def _columns_factory(rows):
cols = [title_column()]
if any(getattr(r, "ext", None) for r in rows):
cols.append(ext_column())
if any(getattr(r, "size_bytes", None) for r in rows):
cols.append(metadata_column("size", "Size"))
# Add up to 2 discovered metadata keys from rows
seen = []
for r in rows:
for k in (r.metadata or {}).keys():
if k in ("name", "title", "path"):
continue
if k not in seen:
seen.append(k)
if len(seen) >= 2:
break
if len(seen) >= 2:
break
for k in seen:
cols.append(metadata_column(k))
return cols
def _selection_fn(row):
if getattr(row, "path", None):
return ["-path", row.path]
return ["-title", row.title or ""]
SAMPLE_ITEMS = [
{"title": "Room of Awe", "path": "sample/Room of Awe", "ext": "zip", "size_bytes": 1024 * 1024 * 12, "full_metadata": {"platform": "PC"}},
{"title": "Song of Joy", "path": "sample/Song of Joy.mp3", "ext": "mp3", "size_bytes": 5120000, "full_metadata": {"platform": "PC"}},
{"title": "Cover Image", "path": "sample/Cover.jpg", "ext": "jpg", "size_bytes": 20480, "full_metadata": {}},
]
try:
register_provider(
"vimm",
_adapter,
columns=_columns_factory,
selection_fn=_selection_fn,
metadata={"description": "Vimm provider bridge (ProviderCore -> ResultTable API)"},
)
except Exception:
# Non-fatal: registration is best-effort
pass
except Exception:
pass