df
This commit is contained in:
185
Provider/vimm.py
Normal file
185
Provider/vimm.py
Normal file
@@ -0,0 +1,185 @@
|
||||
"""Vimm provider skeleton (lxml + HTTPClient).
|
||||
|
||||
This is a lightweight, resilient provider implementation intended as a
|
||||
starting point for implementing a full Vimm (vimm.net) provider.
|
||||
|
||||
It prefers server-rendered HTML parsing via lxml and uses the repo's
|
||||
`HTTPClient` helper for robust HTTP calls (timeouts/retries).
|
||||
|
||||
Selectors in `search()` are intentionally permissive heuristics; update the
|
||||
XPaths to match the real site HTML when you have an actual fixture.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import urljoin, quote_plus
|
||||
from lxml import html as lxml_html
|
||||
|
||||
from API.HTTP import HTTPClient
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import log, debug
|
||||
|
||||
|
||||
class Vimm(Provider):
|
||||
"""Provider for vimm.net vault listings (skeleton).
|
||||
|
||||
- Uses lxml for parsing
|
||||
- No authentication required
|
||||
"""
|
||||
|
||||
URL = ("https://vimm.net/vault/",)
|
||||
URL_DOMAINS = ("vimm.net",)
|
||||
|
||||
def validate(self) -> bool:
|
||||
# This provider has no required config; consider more checks if needed.
|
||||
return True
|
||||
|
||||
def _parse_size_bytes(self, size_str: str) -> Optional[int]:
|
||||
if not size_str:
|
||||
return None
|
||||
try:
|
||||
s = str(size_str or "").strip().replace(",", "")
|
||||
m = re.search(r"(?P<val>[\d\.]+)\s*(?P<unit>[KMGT]?B)?", s, flags=re.I)
|
||||
if not m:
|
||||
return None
|
||||
val = float(m.group("val"))
|
||||
unit = (m.group("unit") or "B").upper()
|
||||
mul = {
|
||||
"B": 1,
|
||||
"KB": 1024,
|
||||
"MB": 1024 ** 2,
|
||||
"GB": 1024 ** 3,
|
||||
"TB": 1024 ** 4,
|
||||
}.get(unit, 1)
|
||||
return int(val * mul)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
q = (query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
# Build search/list URL
|
||||
base = "https://vimm.net/vault/"
|
||||
url = f"{base}?p=list&q={quote_plus(q)}"
|
||||
|
||||
try:
|
||||
with HTTPClient(timeout=20.0) as client:
|
||||
resp = client.get(url)
|
||||
content = resp.content
|
||||
except Exception as exc:
|
||||
log(f"[vimm] HTTP fetch failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
try:
|
||||
doc = lxml_html.fromstring(content)
|
||||
except Exception as exc:
|
||||
log(f"[vimm] HTML parse failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
results: List[SearchResult] = []
|
||||
|
||||
# Candidate XPaths for list items (tweak to match real DOM)
|
||||
container_xpaths = [
|
||||
'//div[contains(@class,"list-item")]',
|
||||
'//div[contains(@class,"result")]',
|
||||
'//li[contains(@class,"item")]',
|
||||
'//tr[contains(@class,"result")]',
|
||||
'//article',
|
||||
]
|
||||
|
||||
nodes = []
|
||||
for xp in container_xpaths:
|
||||
try:
|
||||
found = doc.xpath(xp)
|
||||
if found:
|
||||
nodes = found
|
||||
debug(f"[vimm] using xpath {xp} -> {len(found)} nodes")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Fallback: try generic anchors under a list area
|
||||
if not nodes:
|
||||
try:
|
||||
nodes = doc.xpath('//div[contains(@id,"list")]/div') or doc.xpath('//div[contains(@class,"results")]/div')
|
||||
except Exception:
|
||||
nodes = []
|
||||
|
||||
for n in (nodes or [])[: max(1, int(limit))]:
|
||||
try:
|
||||
# Prefer explicit title anchors
|
||||
title = None
|
||||
href = None
|
||||
try:
|
||||
# a few heuristic searches for a meaningful anchor
|
||||
a = (n.xpath('.//a[contains(@class,"title")]') or
|
||||
n.xpath('.//h2/a') or
|
||||
n.xpath('.//a[contains(@href,"/vault/")]') or
|
||||
n.xpath('.//a'))
|
||||
if a:
|
||||
a0 = a[0]
|
||||
title = a0.text_content().strip()
|
||||
href = a0.get('href')
|
||||
except Exception:
|
||||
title = None
|
||||
href = None
|
||||
|
||||
if not title:
|
||||
title = (n.text_content() or "").strip()
|
||||
|
||||
path = urljoin(base, href) if href else ""
|
||||
|
||||
# Extract size & platform heuristics
|
||||
size_text = ""
|
||||
try:
|
||||
s = n.xpath('.//*[contains(@class,"size")]/text()') or n.xpath('.//span[contains(text(),"MB") or contains(text(),"GB")]/text()')
|
||||
if s:
|
||||
size_text = str(s[0]).strip()
|
||||
except Exception:
|
||||
size_text = ""
|
||||
|
||||
size_bytes = self._parse_size_bytes(size_text)
|
||||
|
||||
platform = ""
|
||||
try:
|
||||
p = n.xpath('.//*[contains(@class,"platform")]/text()')
|
||||
if p:
|
||||
platform = str(p[0]).strip()
|
||||
except Exception:
|
||||
platform = ""
|
||||
|
||||
columns = []
|
||||
if platform:
|
||||
columns.append(("Platform", platform))
|
||||
if size_text:
|
||||
columns.append(("Size", size_text))
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="vimm",
|
||||
title=str(title or "").strip(),
|
||||
path=str(path or ""),
|
||||
detail="",
|
||||
annotations=[],
|
||||
media_kind="file",
|
||||
size_bytes=size_bytes,
|
||||
tag={"vimm"},
|
||||
columns=columns,
|
||||
full_metadata={"raw": lxml_html.tostring(n, encoding="unicode")},
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return results[: max(0, int(limit))]
|
||||
Reference in New Issue
Block a user