186 lines
6.0 KiB
Python
186 lines
6.0 KiB
Python
"""Vimm provider skeleton (lxml + HTTPClient).
|
|
|
|
This is a lightweight, resilient provider implementation intended as a
|
|
starting point for implementing a full Vimm (vimm.net) provider.
|
|
|
|
It prefers server-rendered HTML parsing via lxml and uses the repo's
|
|
`HTTPClient` helper for robust HTTP calls (timeouts/retries).
|
|
|
|
Selectors in `search()` are intentionally permissive heuristics; update the
|
|
XPaths to match the real site HTML when you have an actual fixture.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import sys
|
|
from typing import Any, Dict, List, Optional
|
|
from urllib.parse import urljoin, quote_plus
|
|
from lxml import html as lxml_html
|
|
|
|
from API.HTTP import HTTPClient
|
|
from ProviderCore.base import Provider, SearchResult
|
|
from SYS.logger import log, debug
|
|
|
|
|
|
class Vimm(Provider):
|
|
"""Provider for vimm.net vault listings (skeleton).
|
|
|
|
- Uses lxml for parsing
|
|
- No authentication required
|
|
"""
|
|
|
|
URL = ("https://vimm.net/vault/",)
|
|
URL_DOMAINS = ("vimm.net",)
|
|
|
|
def validate(self) -> bool:
|
|
# This provider has no required config; consider more checks if needed.
|
|
return True
|
|
|
|
def _parse_size_bytes(self, size_str: str) -> Optional[int]:
|
|
if not size_str:
|
|
return None
|
|
try:
|
|
s = str(size_str or "").strip().replace(",", "")
|
|
m = re.search(r"(?P<val>[\d\.]+)\s*(?P<unit>[KMGT]?B)?", s, flags=re.I)
|
|
if not m:
|
|
return None
|
|
val = float(m.group("val"))
|
|
unit = (m.group("unit") or "B").upper()
|
|
mul = {
|
|
"B": 1,
|
|
"KB": 1024,
|
|
"MB": 1024 ** 2,
|
|
"GB": 1024 ** 3,
|
|
"TB": 1024 ** 4,
|
|
}.get(unit, 1)
|
|
return int(val * mul)
|
|
except Exception:
|
|
return None
|
|
|
|
def search(
|
|
self,
|
|
query: str,
|
|
limit: int = 50,
|
|
filters: Optional[Dict[str, Any]] = None,
|
|
**kwargs: Any,
|
|
) -> List[SearchResult]:
|
|
q = (query or "").strip()
|
|
if not q:
|
|
return []
|
|
|
|
# Build search/list URL
|
|
base = "https://vimm.net/vault/"
|
|
url = f"{base}?p=list&q={quote_plus(q)}"
|
|
|
|
try:
|
|
with HTTPClient(timeout=20.0) as client:
|
|
resp = client.get(url)
|
|
content = resp.content
|
|
except Exception as exc:
|
|
log(f"[vimm] HTTP fetch failed: {exc}", file=sys.stderr)
|
|
return []
|
|
|
|
try:
|
|
doc = lxml_html.fromstring(content)
|
|
except Exception as exc:
|
|
log(f"[vimm] HTML parse failed: {exc}", file=sys.stderr)
|
|
return []
|
|
|
|
results: List[SearchResult] = []
|
|
|
|
# Candidate XPaths for list items (tweak to match real DOM)
|
|
container_xpaths = [
|
|
'//div[contains(@class,"list-item")]',
|
|
'//div[contains(@class,"result")]',
|
|
'//li[contains(@class,"item")]',
|
|
'//tr[contains(@class,"result")]',
|
|
'//article',
|
|
]
|
|
|
|
nodes = []
|
|
for xp in container_xpaths:
|
|
try:
|
|
found = doc.xpath(xp)
|
|
if found:
|
|
nodes = found
|
|
debug(f"[vimm] using xpath {xp} -> {len(found)} nodes")
|
|
break
|
|
except Exception:
|
|
continue
|
|
|
|
# Fallback: try generic anchors under a list area
|
|
if not nodes:
|
|
try:
|
|
nodes = doc.xpath('//div[contains(@id,"list")]/div') or doc.xpath('//div[contains(@class,"results")]/div')
|
|
except Exception:
|
|
nodes = []
|
|
|
|
for n in (nodes or [])[: max(1, int(limit))]:
|
|
try:
|
|
# Prefer explicit title anchors
|
|
title = None
|
|
href = None
|
|
try:
|
|
# a few heuristic searches for a meaningful anchor
|
|
a = (n.xpath('.//a[contains(@class,"title")]') or
|
|
n.xpath('.//h2/a') or
|
|
n.xpath('.//a[contains(@href,"/vault/")]') or
|
|
n.xpath('.//a'))
|
|
if a:
|
|
a0 = a[0]
|
|
title = a0.text_content().strip()
|
|
href = a0.get('href')
|
|
except Exception:
|
|
title = None
|
|
href = None
|
|
|
|
if not title:
|
|
title = (n.text_content() or "").strip()
|
|
|
|
path = urljoin(base, href) if href else ""
|
|
|
|
# Extract size & platform heuristics
|
|
size_text = ""
|
|
try:
|
|
s = n.xpath('.//*[contains(@class,"size")]/text()') or n.xpath('.//span[contains(text(),"MB") or contains(text(),"GB")]/text()')
|
|
if s:
|
|
size_text = str(s[0]).strip()
|
|
except Exception:
|
|
size_text = ""
|
|
|
|
size_bytes = self._parse_size_bytes(size_text)
|
|
|
|
platform = ""
|
|
try:
|
|
p = n.xpath('.//*[contains(@class,"platform")]/text()')
|
|
if p:
|
|
platform = str(p[0]).strip()
|
|
except Exception:
|
|
platform = ""
|
|
|
|
columns = []
|
|
if platform:
|
|
columns.append(("Platform", platform))
|
|
if size_text:
|
|
columns.append(("Size", size_text))
|
|
|
|
results.append(
|
|
SearchResult(
|
|
table="vimm",
|
|
title=str(title or "").strip(),
|
|
path=str(path or ""),
|
|
detail="",
|
|
annotations=[],
|
|
media_kind="file",
|
|
size_bytes=size_bytes,
|
|
tag={"vimm"},
|
|
columns=columns,
|
|
full_metadata={"raw": lxml_html.tostring(n, encoding="unicode")},
|
|
)
|
|
)
|
|
except Exception:
|
|
continue
|
|
|
|
return results[: max(0, int(limit))]
|