"""Vimm provider skeleton (lxml + HTTPClient). This is a lightweight, resilient provider implementation intended as a starting point for implementing a full Vimm (vimm.net) provider. It prefers server-rendered HTML parsing via lxml and uses the repo's `HTTPClient` helper for robust HTTP calls (timeouts/retries). Selectors in `search()` are intentionally permissive heuristics; update the XPaths to match the real site HTML when you have an actual fixture. """ from __future__ import annotations import re import sys from typing import Any, Dict, List, Optional from urllib.parse import urljoin, quote_plus from lxml import html as lxml_html from API.HTTP import HTTPClient from ProviderCore.base import Provider, SearchResult from SYS.logger import log, debug class Vimm(Provider): """Provider for vimm.net vault listings (skeleton). - Uses lxml for parsing - No authentication required """ URL = ("https://vimm.net/vault/",) URL_DOMAINS = ("vimm.net",) def validate(self) -> bool: # This provider has no required config; consider more checks if needed. return True def _parse_size_bytes(self, size_str: str) -> Optional[int]: if not size_str: return None try: s = str(size_str or "").strip().replace(",", "") m = re.search(r"(?P[\d\.]+)\s*(?P[KMGT]?B)?", s, flags=re.I) if not m: return None val = float(m.group("val")) unit = (m.group("unit") or "B").upper() mul = { "B": 1, "KB": 1024, "MB": 1024 ** 2, "GB": 1024 ** 3, "TB": 1024 ** 4, }.get(unit, 1) return int(val * mul) except Exception: return None def search( self, query: str, limit: int = 50, filters: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> List[SearchResult]: q = (query or "").strip() if not q: return [] # Build search/list URL base = "https://vimm.net/vault/" url = f"{base}?p=list&q={quote_plus(q)}" try: with HTTPClient(timeout=20.0) as client: resp = client.get(url) content = resp.content except Exception as exc: log(f"[vimm] HTTP fetch failed: {exc}", file=sys.stderr) return [] try: doc = lxml_html.fromstring(content) except Exception as exc: log(f"[vimm] HTML parse failed: {exc}", file=sys.stderr) return [] results: List[SearchResult] = [] # Candidate XPaths for list items (tweak to match real DOM) container_xpaths = [ '//div[contains(@class,"list-item")]', '//div[contains(@class,"result")]', '//li[contains(@class,"item")]', '//tr[contains(@class,"result")]', '//article', ] nodes = [] for xp in container_xpaths: try: found = doc.xpath(xp) if found: nodes = found debug(f"[vimm] using xpath {xp} -> {len(found)} nodes") break except Exception: continue # Fallback: try generic anchors under a list area if not nodes: try: nodes = doc.xpath('//div[contains(@id,"list")]/div') or doc.xpath('//div[contains(@class,"results")]/div') except Exception: nodes = [] for n in (nodes or [])[: max(1, int(limit))]: try: # Prefer explicit title anchors title = None href = None try: # a few heuristic searches for a meaningful anchor a = (n.xpath('.//a[contains(@class,"title")]') or n.xpath('.//h2/a') or n.xpath('.//a[contains(@href,"/vault/")]') or n.xpath('.//a')) if a: a0 = a[0] title = a0.text_content().strip() href = a0.get('href') except Exception: title = None href = None if not title: title = (n.text_content() or "").strip() path = urljoin(base, href) if href else "" # Extract size & platform heuristics size_text = "" try: s = n.xpath('.//*[contains(@class,"size")]/text()') or n.xpath('.//span[contains(text(),"MB") or contains(text(),"GB")]/text()') if s: size_text = str(s[0]).strip() except Exception: size_text = "" size_bytes = self._parse_size_bytes(size_text) platform = "" try: p = n.xpath('.//*[contains(@class,"platform")]/text()') if p: platform = str(p[0]).strip() except Exception: platform = "" columns = [] if platform: columns.append(("Platform", platform)) if size_text: columns.append(("Size", size_text)) results.append( SearchResult( table="vimm", title=str(title or "").strip(), path=str(path or ""), detail="", annotations=[], media_kind="file", size_bytes=size_bytes, tag={"vimm"}, columns=columns, full_metadata={"raw": lxml_html.tostring(n, encoding="unicode")}, ) ) except Exception: continue return results[: max(0, int(limit))]