"""Vimm provider skeleton (lxml + HTTPClient). This is a lightweight, resilient provider implementation intended as a starting point for implementing a full Vimm (vimm.net) provider. It prefers server-rendered HTML parsing via lxml and uses the repo's `HTTPClient` helper for robust HTTP calls (timeouts/retries). Selectors in `search()` are intentionally permissive heuristics; update the XPaths to match the real site HTML when you have an actual fixture. """ from __future__ import annotations import re import sys from typing import Any, Dict, List, Optional from urllib.parse import urljoin, quote_plus from lxml import html as lxml_html from API.HTTP import HTTPClient from ProviderCore.base import Provider, SearchResult from SYS.logger import log, debug class Vimm(Provider): """Provider for vimm.net vault listings (skeleton). - Uses lxml for parsing - No authentication required """ URL = ("https://vimm.net/vault/",) URL_DOMAINS = ("vimm.net",) def validate(self) -> bool: # This provider has no required config; consider more checks if needed. return True def _parse_size_bytes(self, size_str: str) -> Optional[int]: if not size_str: return None try: s = str(size_str or "").strip().replace(",", "") m = re.search(r"(?P[\d\.]+)\s*(?P[KMGT]?B)?", s, flags=re.I) if not m: return None val = float(m.group("val")) unit = (m.group("unit") or "B").upper() mul = { "B": 1, "KB": 1024, "MB": 1024 ** 2, "GB": 1024 ** 3, "TB": 1024 ** 4, }.get(unit, 1) return int(val * mul) except Exception: return None def search( self, query: str, limit: int = 50, filters: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> List[SearchResult]: q = (query or "").strip() if not q: return [] # Build search/list URL base = "https://vimm.net/vault/" url = f"{base}?p=list&q={quote_plus(q)}" try: with HTTPClient(timeout=20.0) as client: resp = client.get(url) content = resp.content except Exception as exc: # Log and return empty results on failure. The HTTP client will # already attempt a certifi-based retry in common certificate # verification failure cases; if you still see cert errors, install # the `certifi` package or configure SSL_CERT_FILE to point at a # valid CA bundle. log(f"[vimm] HTTP fetch failed: {exc}", file=sys.stderr) return [] try: doc = lxml_html.fromstring(content) except Exception as exc: log(f"[vimm] HTML parse failed: {exc}", file=sys.stderr) return [] results: List[SearchResult] = [] # Candidate XPaths for list items (tweak to match real DOM) container_xpaths = [ '//div[contains(@class,"list-item")]', '//div[contains(@class,"result")]', '//li[contains(@class,"item")]', '//tr[contains(@class,"result")]', '//article', ] nodes = [] for xp in container_xpaths: try: found = doc.xpath(xp) if found: nodes = found debug(f"[vimm] using xpath {xp} -> {len(found)} nodes") break except Exception: continue # Fallback: try generic anchors under a list area if not nodes: try: nodes = doc.xpath('//div[contains(@id,"list")]/div') or doc.xpath('//div[contains(@class,"results")]/div') except Exception: nodes = [] for n in (nodes or [])[: max(1, int(limit))]: try: # Prefer explicit title anchors title = None href = None try: # a few heuristic searches for a meaningful anchor a = (n.xpath('.//a[contains(@class,"title")]') or n.xpath('.//h2/a') or n.xpath('.//a[contains(@href,"/vault/")]') or n.xpath('.//a')) if a: a0 = a[0] title = a0.text_content().strip() href = a0.get('href') except Exception: title = None href = None if not title: title = (n.text_content() or "").strip() path = urljoin(base, href) if href else "" # Extract size & platform heuristics size_text = "" try: s = n.xpath('.//*[contains(@class,"size")]/text()') or n.xpath('.//span[contains(text(),"MB") or contains(text(),"GB")]/text()') if s: size_text = str(s[0]).strip() except Exception: size_text = "" size_bytes = self._parse_size_bytes(size_text) platform = "" try: p = n.xpath('.//*[contains(@class,"platform")]/text()') if p: platform = str(p[0]).strip() except Exception: platform = "" columns = [] if platform: columns.append(("Platform", platform)) if size_text: columns.append(("Size", size_text)) results.append( SearchResult( table="vimm", title=str(title or "").strip(), path=str(path or ""), detail="", annotations=[], media_kind="file", size_bytes=size_bytes, tag={"vimm"}, columns=columns, full_metadata={"raw": lxml_html.tostring(n, encoding="unicode")}, ) ) except Exception: continue return results[: max(0, int(limit))] # Bridge into the ResultTable provider registry so vimm results can be rendered # with the new provider/table/select API. try: from SYS.result_table_adapters import register_provider from SYS.result_table_api import ResultModel from SYS.result_table_api import title_column, ext_column, metadata_column def _convert_search_result_to_model(sr): try: if hasattr(sr, "to_dict"): d = sr.to_dict() elif isinstance(sr, dict): d = sr else: d = { "title": getattr(sr, "title", str(sr)), "path": getattr(sr, "path", None), "size_bytes": getattr(sr, "size_bytes", None), "columns": getattr(sr, "columns", None), "full_metadata": getattr(sr, "full_metadata", None), } except Exception: d = {"title": getattr(sr, "title", str(sr))} title = d.get("title") or "" path = d.get("path") or None size = d.get("size_bytes") or None ext = None try: if path: from pathlib import Path suf = Path(str(path)).suffix if suf: ext = suf.lstrip(".") except Exception: ext = None metadata = d.get("full_metadata") or d.get("metadata") or {} return ResultModel( title=str(title), path=str(path) if path is not None else None, ext=str(ext) if ext is not None else None, size_bytes=int(size) if size is not None else None, metadata=metadata or {}, source="vimm", ) def _adapter(items): for it in items: yield _convert_search_result_to_model(it) def _columns_factory(rows): cols = [title_column()] if any(getattr(r, "ext", None) for r in rows): cols.append(ext_column()) if any(getattr(r, "size_bytes", None) for r in rows): cols.append(metadata_column("size", "Size")) # Add up to 2 discovered metadata keys from rows seen = [] for r in rows: for k in (r.metadata or {}).keys(): if k in ("name", "title", "path"): continue if k not in seen: seen.append(k) if len(seen) >= 2: break if len(seen) >= 2: break for k in seen: cols.append(metadata_column(k)) return cols def _selection_fn(row): if getattr(row, "path", None): return ["-path", row.path] return ["-title", row.title or ""] SAMPLE_ITEMS = [ {"title": "Room of Awe", "path": "sample/Room of Awe", "ext": "zip", "size_bytes": 1024 * 1024 * 12, "full_metadata": {"platform": "PC"}}, {"title": "Song of Joy", "path": "sample/Song of Joy.mp3", "ext": "mp3", "size_bytes": 5120000, "full_metadata": {"platform": "PC"}}, {"title": "Cover Image", "path": "sample/Cover.jpg", "ext": "jpg", "size_bytes": 20480, "full_metadata": {}}, ] try: register_provider( "vimm", _adapter, columns=_columns_factory, selection_fn=_selection_fn, metadata={"description": "Vimm provider bridge (ProviderCore -> ResultTable API)"}, ) except Exception: # Non-fatal: registration is best-effort pass except Exception: pass