df

2026-01-05 07:51:19 -08:00
parent 8545367e28
commit 1f765cffda
32 changed files with 3447 additions and 3250 deletions
--- a/Provider/vimm.py
+++ b/Provider/vimm.py
@@ -0,0 +1,185 @@
+"""Vimm provider skeleton (lxml + HTTPClient).
+
+This is a lightweight, resilient provider implementation intended as a
+starting point for implementing a full Vimm (vimm.net) provider.
+
+It prefers server-rendered HTML parsing via lxml and uses the repo's
+`HTTPClient` helper for robust HTTP calls (timeouts/retries).
+
+Selectors in `search()` are intentionally permissive heuristics; update the
+XPaths to match the real site HTML when you have an actual fixture.
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+from typing import Any, Dict, List, Optional
+from urllib.parse import urljoin, quote_plus
+from lxml import html as lxml_html
+
+from API.HTTP import HTTPClient
+from ProviderCore.base import Provider, SearchResult
+from SYS.logger import log, debug
+
+
+class Vimm(Provider):
+    """Provider for vimm.net vault listings (skeleton).
+
+    - Uses lxml for parsing
+    - No authentication required
+    """
+
+    URL = ("https://vimm.net/vault/",)
+    URL_DOMAINS = ("vimm.net",)
+
+    def validate(self) -> bool:
+        # This provider has no required config; consider more checks if needed.
+        return True
+
+    def _parse_size_bytes(self, size_str: str) -> Optional[int]:
+        if not size_str:
+            return None
+        try:
+            s = str(size_str or "").strip().replace(",", "")
+            m = re.search(r"(?P<val>[\d\.]+)\s*(?P<unit>[KMGT]?B)?", s, flags=re.I)
+            if not m:
+                return None
+            val = float(m.group("val"))
+            unit = (m.group("unit") or "B").upper()
+            mul = {
+                "B": 1,
+                "KB": 1024,
+                "MB": 1024 ** 2,
+                "GB": 1024 ** 3,
+                "TB": 1024 ** 4,
+            }.get(unit, 1)
+            return int(val * mul)
+        except Exception:
+            return None
+
+    def search(
+        self,
+        query: str,
+        limit: int = 50,
+        filters: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> List[SearchResult]:
+        q = (query or "").strip()
+        if not q:
+            return []
+
+        # Build search/list URL
+        base = "https://vimm.net/vault/"
+        url = f"{base}?p=list&q={quote_plus(q)}"
+
+        try:
+            with HTTPClient(timeout=20.0) as client:
+                resp = client.get(url)
+                content = resp.content
+        except Exception as exc:
+            log(f"[vimm] HTTP fetch failed: {exc}", file=sys.stderr)
+            return []
+
+        try:
+            doc = lxml_html.fromstring(content)
+        except Exception as exc:
+            log(f"[vimm] HTML parse failed: {exc}", file=sys.stderr)
+            return []
+
+        results: List[SearchResult] = []
+
+        # Candidate XPaths for list items (tweak to match real DOM)
+        container_xpaths = [
+            '//div[contains(@class,"list-item")]',
+            '//div[contains(@class,"result")]',
+            '//li[contains(@class,"item")]',
+            '//tr[contains(@class,"result")]',
+            '//article',
+        ]
+
+        nodes = []
+        for xp in container_xpaths:
+            try:
+                found = doc.xpath(xp)
+                if found:
+                    nodes = found
+                    debug(f"[vimm] using xpath {xp} -> {len(found)} nodes")
+                    break
+            except Exception:
+                continue
+
+        # Fallback: try generic anchors under a list area
+        if not nodes:
+            try:
+                nodes = doc.xpath('//div[contains(@id,"list")]/div') or doc.xpath('//div[contains(@class,"results")]/div')
+            except Exception:
+                nodes = []
+
+        for n in (nodes or [])[: max(1, int(limit))]:
+            try:
+                # Prefer explicit title anchors
+                title = None
+                href = None
+                try:
+                    # a few heuristic searches for a meaningful anchor
+                    a = (n.xpath('.//a[contains(@class,"title")]') or
+                         n.xpath('.//h2/a') or
+                         n.xpath('.//a[contains(@href,"/vault/")]') or
+                         n.xpath('.//a'))
+                    if a:
+                        a0 = a[0]
+                        title = a0.text_content().strip()
+                        href = a0.get('href')
+                except Exception:
+                    title = None
+                    href = None
+
+                if not title:
+                    title = (n.text_content() or "").strip()
+
+                path = urljoin(base, href) if href else ""
+
+                # Extract size & platform heuristics
+                size_text = ""
+                try:
+                    s = n.xpath('.//*[contains(@class,"size")]/text()') or n.xpath('.//span[contains(text(),"MB") or contains(text(),"GB")]/text()')
+                    if s:
+                        size_text = str(s[0]).strip()
+                except Exception:
+                    size_text = ""
+
+                size_bytes = self._parse_size_bytes(size_text)
+
+                platform = ""
+                try:
+                    p = n.xpath('.//*[contains(@class,"platform")]/text()')
+                    if p:
+                        platform = str(p[0]).strip()
+                except Exception:
+                    platform = ""
+
+                columns = []
+                if platform:
+                    columns.append(("Platform", platform))
+                if size_text:
+                    columns.append(("Size", size_text))
+
+                results.append(
+                    SearchResult(
+                        table="vimm",
+                        title=str(title or "").strip(),
+                        path=str(path or ""),
+                        detail="",
+                        annotations=[],
+                        media_kind="file",
+                        size_bytes=size_bytes,
+                        tag={"vimm"},
+                        columns=columns,
+                        full_metadata={"raw": lxml_html.tostring(n, encoding="unicode")},
+                    )
+                )
+            except Exception:
+                continue
+
+        return results[: max(0, int(limit))]