292 lines
9.8 KiB
Python
292 lines
9.8 KiB
Python
"""Vimm provider skeleton (lxml + HTTPClient).
|
|
|
|
This is a lightweight, resilient provider implementation intended as a
|
|
starting point for implementing a full Vimm (vimm.net) provider.
|
|
|
|
It prefers server-rendered HTML parsing via lxml and uses the repo's
|
|
`HTTPClient` helper for robust HTTP calls (timeouts/retries).
|
|
|
|
Selectors in `search()` are intentionally permissive heuristics; update the
|
|
XPaths to match the real site HTML when you have an actual fixture.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import sys
|
|
from typing import Any, Dict, List, Optional
|
|
from urllib.parse import urljoin, quote_plus
|
|
from lxml import html as lxml_html
|
|
|
|
from API.HTTP import HTTPClient
|
|
from ProviderCore.base import Provider, SearchResult
|
|
from SYS.logger import log, debug
|
|
|
|
|
|
class Vimm(Provider):
|
|
"""Provider for vimm.net vault listings (skeleton).
|
|
|
|
- Uses lxml for parsing
|
|
- No authentication required
|
|
"""
|
|
|
|
URL = ("https://vimm.net/vault/",)
|
|
URL_DOMAINS = ("vimm.net",)
|
|
|
|
def validate(self) -> bool:
|
|
# This provider has no required config; consider more checks if needed.
|
|
return True
|
|
|
|
def _parse_size_bytes(self, size_str: str) -> Optional[int]:
|
|
if not size_str:
|
|
return None
|
|
try:
|
|
s = str(size_str or "").strip().replace(",", "")
|
|
m = re.search(r"(?P<val>[\d\.]+)\s*(?P<unit>[KMGT]?B)?", s, flags=re.I)
|
|
if not m:
|
|
return None
|
|
val = float(m.group("val"))
|
|
unit = (m.group("unit") or "B").upper()
|
|
mul = {
|
|
"B": 1,
|
|
"KB": 1024,
|
|
"MB": 1024 ** 2,
|
|
"GB": 1024 ** 3,
|
|
"TB": 1024 ** 4,
|
|
}.get(unit, 1)
|
|
return int(val * mul)
|
|
except Exception:
|
|
return None
|
|
|
|
def search(
|
|
self,
|
|
query: str,
|
|
limit: int = 50,
|
|
filters: Optional[Dict[str, Any]] = None,
|
|
**kwargs: Any,
|
|
) -> List[SearchResult]:
|
|
q = (query or "").strip()
|
|
if not q:
|
|
return []
|
|
|
|
# Build search/list URL
|
|
base = "https://vimm.net/vault/"
|
|
url = f"{base}?p=list&q={quote_plus(q)}"
|
|
|
|
try:
|
|
with HTTPClient(timeout=20.0) as client:
|
|
resp = client.get(url)
|
|
content = resp.content
|
|
except Exception as exc:
|
|
# Log and return empty results on failure. The HTTP client will
|
|
# already attempt a certifi-based retry in common certificate
|
|
# verification failure cases; if you still see cert errors, install
|
|
# the `certifi` package or configure SSL_CERT_FILE to point at a
|
|
# valid CA bundle.
|
|
log(f"[vimm] HTTP fetch failed: {exc}", file=sys.stderr)
|
|
return []
|
|
|
|
try:
|
|
doc = lxml_html.fromstring(content)
|
|
except Exception as exc:
|
|
log(f"[vimm] HTML parse failed: {exc}", file=sys.stderr)
|
|
return []
|
|
|
|
results: List[SearchResult] = []
|
|
|
|
# Candidate XPaths for list items (tweak to match real DOM)
|
|
container_xpaths = [
|
|
'//div[contains(@class,"list-item")]',
|
|
'//div[contains(@class,"result")]',
|
|
'//li[contains(@class,"item")]',
|
|
'//tr[contains(@class,"result")]',
|
|
'//article',
|
|
]
|
|
|
|
nodes = []
|
|
for xp in container_xpaths:
|
|
try:
|
|
found = doc.xpath(xp)
|
|
if found:
|
|
nodes = found
|
|
debug(f"[vimm] using xpath {xp} -> {len(found)} nodes")
|
|
break
|
|
except Exception:
|
|
continue
|
|
|
|
# Fallback: try generic anchors under a list area
|
|
if not nodes:
|
|
try:
|
|
nodes = doc.xpath('//div[contains(@id,"list")]/div') or doc.xpath('//div[contains(@class,"results")]/div')
|
|
except Exception:
|
|
nodes = []
|
|
|
|
for n in (nodes or [])[: max(1, int(limit))]:
|
|
try:
|
|
# Prefer explicit title anchors
|
|
title = None
|
|
href = None
|
|
try:
|
|
# a few heuristic searches for a meaningful anchor
|
|
a = (n.xpath('.//a[contains(@class,"title")]') or
|
|
n.xpath('.//h2/a') or
|
|
n.xpath('.//a[contains(@href,"/vault/")]') or
|
|
n.xpath('.//a'))
|
|
if a:
|
|
a0 = a[0]
|
|
title = a0.text_content().strip()
|
|
href = a0.get('href')
|
|
except Exception:
|
|
title = None
|
|
href = None
|
|
|
|
if not title:
|
|
title = (n.text_content() or "").strip()
|
|
|
|
path = urljoin(base, href) if href else ""
|
|
|
|
# Extract size & platform heuristics
|
|
size_text = ""
|
|
try:
|
|
s = n.xpath('.//*[contains(@class,"size")]/text()') or n.xpath('.//span[contains(text(),"MB") or contains(text(),"GB")]/text()')
|
|
if s:
|
|
size_text = str(s[0]).strip()
|
|
except Exception:
|
|
size_text = ""
|
|
|
|
size_bytes = self._parse_size_bytes(size_text)
|
|
|
|
platform = ""
|
|
try:
|
|
p = n.xpath('.//*[contains(@class,"platform")]/text()')
|
|
if p:
|
|
platform = str(p[0]).strip()
|
|
except Exception:
|
|
platform = ""
|
|
|
|
columns = []
|
|
if platform:
|
|
columns.append(("Platform", platform))
|
|
if size_text:
|
|
columns.append(("Size", size_text))
|
|
|
|
results.append(
|
|
SearchResult(
|
|
table="vimm",
|
|
title=str(title or "").strip(),
|
|
path=str(path or ""),
|
|
detail="",
|
|
annotations=[],
|
|
media_kind="file",
|
|
size_bytes=size_bytes,
|
|
tag={"vimm"},
|
|
columns=columns,
|
|
full_metadata={"raw": lxml_html.tostring(n, encoding="unicode")},
|
|
)
|
|
)
|
|
except Exception:
|
|
continue
|
|
|
|
return results[: max(0, int(limit))]
|
|
|
|
|
|
# Bridge into the ResultTable provider registry so vimm results can be rendered
|
|
# with the new provider/table/select API.
|
|
try:
|
|
from SYS.result_table_adapters import register_provider
|
|
from SYS.result_table_api import ResultModel
|
|
from SYS.result_table_api import title_column, ext_column, metadata_column
|
|
|
|
def _convert_search_result_to_model(sr):
|
|
try:
|
|
if hasattr(sr, "to_dict"):
|
|
d = sr.to_dict()
|
|
elif isinstance(sr, dict):
|
|
d = sr
|
|
else:
|
|
d = {
|
|
"title": getattr(sr, "title", str(sr)),
|
|
"path": getattr(sr, "path", None),
|
|
"size_bytes": getattr(sr, "size_bytes", None),
|
|
"columns": getattr(sr, "columns", None),
|
|
"full_metadata": getattr(sr, "full_metadata", None),
|
|
}
|
|
except Exception:
|
|
d = {"title": getattr(sr, "title", str(sr))}
|
|
|
|
title = d.get("title") or ""
|
|
path = d.get("path") or None
|
|
size = d.get("size_bytes") or None
|
|
ext = None
|
|
try:
|
|
if path:
|
|
from pathlib import Path
|
|
|
|
suf = Path(str(path)).suffix
|
|
if suf:
|
|
ext = suf.lstrip(".")
|
|
except Exception:
|
|
ext = None
|
|
|
|
metadata = d.get("full_metadata") or d.get("metadata") or {}
|
|
return ResultModel(
|
|
title=str(title),
|
|
path=str(path) if path is not None else None,
|
|
ext=str(ext) if ext is not None else None,
|
|
size_bytes=int(size) if size is not None else None,
|
|
metadata=metadata or {},
|
|
source="vimm",
|
|
)
|
|
|
|
def _adapter(items):
|
|
for it in items:
|
|
yield _convert_search_result_to_model(it)
|
|
|
|
def _columns_factory(rows):
|
|
cols = [title_column()]
|
|
if any(getattr(r, "ext", None) for r in rows):
|
|
cols.append(ext_column())
|
|
if any(getattr(r, "size_bytes", None) for r in rows):
|
|
cols.append(metadata_column("size", "Size"))
|
|
# Add up to 2 discovered metadata keys from rows
|
|
seen = []
|
|
for r in rows:
|
|
for k in (r.metadata or {}).keys():
|
|
if k in ("name", "title", "path"):
|
|
continue
|
|
if k not in seen:
|
|
seen.append(k)
|
|
if len(seen) >= 2:
|
|
break
|
|
if len(seen) >= 2:
|
|
break
|
|
for k in seen:
|
|
cols.append(metadata_column(k))
|
|
return cols
|
|
|
|
def _selection_fn(row):
|
|
if getattr(row, "path", None):
|
|
return ["-path", row.path]
|
|
return ["-title", row.title or ""]
|
|
|
|
SAMPLE_ITEMS = [
|
|
{"title": "Room of Awe", "path": "sample/Room of Awe", "ext": "zip", "size_bytes": 1024 * 1024 * 12, "full_metadata": {"platform": "PC"}},
|
|
{"title": "Song of Joy", "path": "sample/Song of Joy.mp3", "ext": "mp3", "size_bytes": 5120000, "full_metadata": {"platform": "PC"}},
|
|
{"title": "Cover Image", "path": "sample/Cover.jpg", "ext": "jpg", "size_bytes": 20480, "full_metadata": {}},
|
|
]
|
|
|
|
try:
|
|
register_provider(
|
|
"vimm",
|
|
_adapter,
|
|
columns=_columns_factory,
|
|
selection_fn=_selection_fn,
|
|
metadata={"description": "Vimm provider bridge (ProviderCore -> ResultTable API)"},
|
|
)
|
|
except Exception:
|
|
# Non-fatal: registration is best-effort
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|