refactor(download): remove ProviderCore/download.py, move sanitize_filename to SYS.utils, replace callers to use API.HTTP.HTTPClient

2026-01-06 01:38:59 -08:00
parent 3b363dd536
commit 41c11d39fd
38 changed files with 2640 additions and 526 deletions
--- a/SYS/html_table.py
+++ b/SYS/html_table.py
@@ -0,0 +1,302 @@
+"""Small helper utilities for extracting structured records from HTML tables
+using lxml.
+
+Goal: make it trivial for provider authors to extract table rows and common
+fields (title, link, standardized column keys) without re-implementing the
+same heuristics in every provider.
+
+Key functions:
+- find_candidate_nodes(doc_or_html, xpaths=...)
+- extract_records(doc_or_html, base_url=None, xpaths=...)
+- normalize_header(name, synonyms=...)
+
+This module intentionally avoids heavyweight deps (no pandas) and works with
+`lxml.html` elements (the project already uses lxml).
+"""
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional, Tuple
+from lxml import html as lxml_html
+from urllib.parse import urljoin
+import re
+
+# Default xpaths for candidate result containers
+_DEFAULT_XPATHS = [
+    "//table//tbody/tr",
+    "//table//tr[td]",
+    "//div[contains(@class,'list-item')]",
+    "//div[contains(@class,'result')]",
+    "//li[contains(@class,'item')]",
+]
+
+# Simple header synonyms (you can extend as needed)
+_DEFAULT_SYNONYMS = {
+    "platform": "system",
+    "system": "system",
+    "name": "title",
+    "title": "title",
+}
+
+
+def _ensure_doc(doc_or_html: Any) -> lxml_html.HtmlElement:
+    if isinstance(doc_or_html, str):
+        return lxml_html.fromstring(doc_or_html)
+    return doc_or_html
+
+
+def _text_or_img_title(el) -> str:
+    # Prefer img/@title if present (useful for flag icons)
+    try:
+        imgs = el.xpath('.//img/@title')
+        if imgs:
+            return str(imgs[0]).strip()
+    except Exception:
+        pass
+    return (el.text_content() or "").strip()
+
+
+def find_candidate_nodes(doc_or_html: Any, xpaths: Optional[List[str]] = None) -> Tuple[List[Any], Optional[str]]:
+    """Find candidate nodes for results using a prioritized xpath list.
+
+    Returns (nodes, chosen_xpath).
+    """
+    doc = _ensure_doc(doc_or_html)
+    for xp in (xpaths or _DEFAULT_XPATHS):
+        try:
+            found = doc.xpath(xp)
+            if found:
+                return list(found), xp
+        except Exception:
+            continue
+    return [], None
+
+
+def _parse_tr_nodes(tr_nodes: List[Any], base: Optional[str] = None) -> List[Dict[str, str]]:
+    out: List[Dict[str, str]] = []
+
+    for tr in tr_nodes:
+        try:
+            tds = tr.xpath("./td")
+            if not tds or len(tds) < 1:
+                continue
+
+            # canonical fields
+            rec: Dict[str, str] = {}
+
+            # Heuristic: if the first cell contains an anchor, treat it as the title/path
+            # (detail pages often put the file link in the first column and size in the second).
+            a0 = tds[0].xpath('.//a[contains(@href,"/vault/")]') or tds[0].xpath('.//a')
+            if a0:
+                rec["title"] = (a0[0].text_content() or "").strip()
+                href = a0[0].get("href")
+                rec["path"] = urljoin(base, href) if href and base else (href or "")
+
+                # Try to find a size cell in the remaining tds (class 'size' is common)
+                size_val = None
+                for td in tds[1:]:
+                    s = td.xpath('.//span[contains(@class,"size")]/text()')
+                    if s:
+                        size_val = str(s[0]).strip()
+                        break
+                if not size_val and len(tds) > 1:
+                    txt = (tds[1].text_content() or "").strip()
+                    # crude size heuristic: contains digits and a unit letter
+                    if txt and re.search(r"\d", txt):
+                        size_val = txt
+
+                if size_val:
+                    rec["size"] = size_val
+
+            else:
+                # First cell often "system"/"platform"
+                rec["platform"] = _text_or_img_title(tds[0])
+
+                # Title + optional link from second column
+                if len(tds) > 1:
+                    a = tds[1].xpath('.//a[contains(@href,"/vault/")]') or tds[1].xpath('.//a')
+                    if a:
+                        rec["title"] = (a[0].text_content() or "").strip()
+                        href = a[0].get("href")
+                        rec["path"] = urljoin(base, href) if href and base else (href or "")
+                    else:
+                        rec["title"] = (tds[1].text_content() or "").strip()
+
+                # Additional columns in common Vimm layout
+                if len(tds) > 2:
+                    rec["region"] = _text_or_img_title(tds[2]).strip()
+                if len(tds) > 3:
+                    rec["version"] = (tds[3].text_content() or "").strip()
+                if len(tds) > 4:
+                    rec["languages"] = (tds[4].text_content() or "").strip()
+
+            out.append(rec)
+        except Exception:
+            continue
+
+    return out
+
+
+def _parse_list_item_nodes(nodes: List[Any], base: Optional[str] = None) -> List[Dict[str, str]]:
+    out: List[Dict[str, str]] = []
+    for node in nodes:
+        try:
+            rec: Dict[str, str] = {}
+            # title heuristics
+            a = node.xpath('.//h2/a') or node.xpath('.//a')
+            if a:
+                rec["title"] = (a[0].text_content() or "").strip()
+                href = a[0].get("href")
+                rec["path"] = urljoin(base, href) if href and base else (href or "")
+            else:
+                rec["title"] = (node.text_content() or "").strip()
+
+            # platform, size
+            p = node.xpath('.//span[contains(@class,"platform")]/text()')
+            if p:
+                rec["platform"] = str(p[0]).strip()
+
+            s = node.xpath('.//span[contains(@class,"size")]/text()')
+            if s:
+                rec["size"] = str(s[0]).strip()
+
+            out.append(rec)
+        except Exception:
+            continue
+    return out
+
+
+def normalize_header(name: str, synonyms: Optional[Dict[str, str]] = None) -> str:
+    """Normalize header names to a canonical form.
+
+    Defaults map 'platform' -> 'system' and 'name' -> 'title', but callers
+    can pass a custom synonyms dict.
+    """
+    if not name:
+        return ""
+    s = str(name or "").strip().lower()
+    s = re.sub(r"\s+", "_", s)
+    syn = (synonyms or _DEFAULT_SYNONYMS).get(s)
+    return syn or s
+
+
+def extract_records(doc_or_html: Any, base_url: Optional[str] = None, xpaths: Optional[List[str]] = None, use_pandas_if_available: bool = True) -> Tuple[List[Dict[str, str]], Optional[str]]:
+    """Find result candidate nodes and return a list of normalized records plus chosen xpath.
+
+    If pandas is available and `use_pandas_if_available` is True, attempt to parse
+    HTML tables using `pandas.read_html` and return those records. Falls back to
+    node-based parsing when pandas is not available or fails. Returns (records, chosen)
+    where `chosen` is the xpath that matched or the string 'pandas' when the
+    pandas path was used.
+    """
+    # Prepare an HTML string for pandas if needed
+    html_text: Optional[str] = None
+    if isinstance(doc_or_html, (bytes, bytearray)):
+        try:
+            html_text = doc_or_html.decode("utf-8")
+        except Exception:
+            html_text = doc_or_html.decode("latin-1", errors="ignore")
+    elif isinstance(doc_or_html, str):
+        html_text = doc_or_html
+    else:
+        try:
+            html_text = lxml_html.tostring(doc_or_html, encoding="unicode")
+        except Exception:
+            html_text = str(doc_or_html)
+
+    # Try pandas first when available and requested
+    if use_pandas_if_available and html_text is not None:
+        try:
+            import pandas as _pd  # type: ignore
+
+            dfs = _pd.read_html(html_text)
+            if dfs:
+                # pick the largest dataframe by row count for heuristics
+                df = max(dfs, key=lambda d: getattr(d, "shape", (len(getattr(d, 'index', [])), 0))[0])
+                try:
+                    rows = df.to_dict("records")
+                except Exception:
+                    # Some DataFrame-like objects may have slightly different APIs
+                    rows = [dict(r) for r in df]
+
+                records: List[Dict[str, str]] = []
+                for row in rows:
+                    nr: Dict[str, str] = {}
+                    for k, v in (row or {}).items():
+                        nk = normalize_header(str(k or ""))
+                        nr[nk] = (str(v).strip() if v is not None else "")
+                    records.append(nr)
+
+                # Attempt to recover hrefs by matching anchor text -> href
+                try:
+                    doc = lxml_html.fromstring(html_text)
+                    anchors = {}
+                    for a in doc.xpath('//a'):
+                        txt = (a.text_content() or "").strip()
+                        href = a.get("href")
+                        if txt and href and txt not in anchors:
+                            anchors[txt] = href
+                    for rec in records:
+                        if not rec.get("path") and rec.get("title"):
+                            href = anchors.get(rec["title"])
+                            if href:
+                                rec["path"] = urljoin(base_url, href) if base_url else href
+                except Exception:
+                    pass
+
+                return records, "pandas"
+        except Exception:
+            # Pandas not present or parsing failed; fall back to node parsing
+            pass
+
+    # Fallback to node-based parsing
+    nodes, chosen = find_candidate_nodes(doc_or_html, xpaths=xpaths)
+    if not nodes:
+        return [], chosen
+
+    # Determine node type and parse accordingly
+    first = nodes[0]
+    tag = getattr(first, "tag", "").lower()
+    if tag == "tr":
+        records = _parse_tr_nodes(nodes, base=base_url)
+    else:
+        # list-item style
+        records = _parse_list_item_nodes(nodes, base=base_url)
+
+    # Normalize keys (map platform->system etc)
+    normed: List[Dict[str, str]] = []
+    for r in records:
+        nr: Dict[str, str] = {}
+        for k, v in (r or {}).items():
+            nk = normalize_header(k)
+            nr[nk] = v
+        normed.append(nr)
+
+    return normed, chosen
+
+
+# Small convenience: convert records to SearchResult. Providers can call this or
+# use their own mapping when they need full SearchResult objects.
+from ProviderCore.base import SearchResult  # local import to avoid circular issues
+
+
+def records_to_search_results(records: List[Dict[str, str]], table: str = "provider") -> List[SearchResult]:
+    out: List[SearchResult] = []
+    for rec in records:
+        title = rec.get("title") or rec.get("name") or ""
+        path = rec.get("path") or ""
+        meta = dict(rec)
+        out.append(
+            SearchResult(
+                table=table,
+                title=str(title),
+                path=str(path),
+                detail="",
+                annotations=[],
+                media_kind="file",
+                size_bytes=None,
+                tag={table},
+                columns=[(k.title(), v) for k, v in rec.items() if k and v],
+                full_metadata={"raw_record": rec, "raw": rec},
+            )
+        )
+    return out
--- a/SYS/pipeline.py
+++ b/SYS/pipeline.py
@@ -972,6 +972,16 @@ def get_last_result_table_row_selection_args(row_index: int) -> Optional[List[st
    return None


+def get_last_result_table_row_selection_action(row_index: int) -> Optional[List[str]]:
+    """Get the expanded stage tokens for a row in the last result table."""
+    state = _get_pipeline_state()
+    if _is_selectable_table(state.last_result_table) and hasattr(state.last_result_table, "rows"):
+        if 0 <= row_index < len(state.last_result_table.rows):
+            row = state.last_result_table.rows[row_index]
+            if hasattr(row, "selection_action"):
+                return row.selection_action
+    return None
+
 def set_current_stage_table(result_table: Optional[Any]) -> None:
    """Store the current pipeline stage table for @N expansion.

@@ -1035,6 +1045,17 @@ def get_current_stage_table_row_selection_args(row_index: int) -> Optional[List[
    return None


+def get_current_stage_table_row_selection_action(row_index: int) -> Optional[List[str]]:
+    """Get the expanded stage tokens for a row in the current stage table."""
+    state = _get_pipeline_state()
+    if _is_selectable_table(state.current_stage_table) and hasattr(state.current_stage_table, "rows"):
+        if 0 <= row_index < len(state.current_stage_table.rows):
+            row = state.current_stage_table.rows[row_index]
+            if hasattr(row, "selection_action"):
+                return row.selection_action
+    return None
+
+
 def get_current_stage_table_row_source_index(row_index: int) -> Optional[int]:
    """Get the original source index for a row in the current stage table.

--- a/SYS/provider_helpers.py
+++ b/SYS/provider_helpers.py
@@ -0,0 +1,110 @@
+"""Convenience mixins and helpers for table-based providers.
+
+Provides a small `TableProviderMixin` that handles HTTP fetch + table extraction
+(using `SYS.html_table.extract_records`) and converts records into
+`ProviderCore.base.SearchResult` rows with sane default column ordering.
+
+Providers can subclass this mixin to implement search quickly:
+
+class MyProvider(TableProviderMixin, Provider):
+    URL = ("https://example.org/search",)
+
+    def search(self, query, limit=50, **kwargs):
+        url = f"{self.URL[0]}?q={quote_plus(query)}"
+        return self.search_table_from_url(url, limit=limit, xpaths=self.DEFAULT_XPATHS)
+
+The mixin deliberately avoids adding heavy dependencies (uses our lxml helper)
+so authors don't have to install pandas/bs4 unless they want to.
+"""
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional
+from urllib.parse import quote_plus
+
+from API.HTTP import HTTPClient
+from ProviderCore.base import SearchResult
+from SYS.html_table import extract_records
+import lxml.html as lxml_html
+
+
+class TableProviderMixin:
+    """Mixin to simplify providers that scrape table/list results from HTML.
+
+    Methods:
+      - search_table_from_url(url, limit, xpaths): fetches HTML, extracts records, returns SearchResults
+      - DEFAULT_XPATHS: default xpath list used when none is provided
+    """
+
+    # Reuse the same defaults as the html_table helper
+    DEFAULT_XPATHS: List[str] = [
+        "//table//tbody/tr",
+        "//table//tr[td]",
+        "//div[contains(@class,'list-item')]",
+        "//div[contains(@class,'result')]",
+        "//li[contains(@class,'item')]",
+    ]
+
+    def search_table_from_url(self, url: str, limit: int = 50, xpaths: Optional[List[str]] = None, timeout: float = 15.0) -> List[SearchResult]:
+        """Fetch `url`, extract table/list records, and return SearchResult list.
+
+        `xpaths` is passed to `extract_records` (falls back to DEFAULT_XPATHS).
+        """
+        if not url:
+            return []
+
+        try:
+            with HTTPClient(timeout=timeout) as client:
+                resp = client.get(url)
+                content = resp.content
+        except Exception:
+            return []
+
+        # Ensure we pass an lxml document or string (httpx returns bytes)
+        try:
+            doc = lxml_html.fromstring(content)
+        except Exception:
+            try:
+                doc = content.decode("utf-8")
+            except Exception:
+                doc = str(content)
+
+        records, chosen = extract_records(doc, base_url=url, xpaths=xpaths or self.DEFAULT_XPATHS)
+
+        results: List[SearchResult] = []
+        for rec in (records or [])[: int(limit)]:
+            title = rec.get("title") or ""
+            path = rec.get("path") or ""
+            platform = rec.get("system") or rec.get("platform") or ""
+            size = rec.get("size") or ""
+            region = rec.get("region") or ""
+            version = rec.get("version") or ""
+            languages = rec.get("languages") or ""
+
+            cols = [("Title", title)]
+            if platform:
+                cols.append(("Platform", platform))
+            if size:
+                cols.append(("Size", size))
+            if region:
+                cols.append(("Region", region))
+            if version:
+                cols.append(("Version", version))
+            if languages:
+                cols.append(("Languages", languages))
+
+            results.append(
+                SearchResult(
+                    table=(getattr(self, "name", "provider") or "provider"),
+                    title=title,
+                    path=path,
+                    detail="",
+                    annotations=[],
+                    media_kind="file",
+                    size_bytes=None,
+                    tag={getattr(self, "name", "provider")},
+                    columns=cols,
+                    full_metadata={"raw_record": rec},
+                )
+            )
+
+        return results
--- a/SYS/result_table.py
+++ b/SYS/result_table.py
@@ -359,6 +359,8 @@ class ResultRow:
    columns: List[ResultColumn] = field(default_factory=list)
    selection_args: Optional[List[str]] = None
    """Arguments to use for this row when selected via @N syntax (e.g., ['-item', '3'])"""
+    selection_action: Optional[List[str]] = None
+    """Full expanded stage tokens that should run when this row is selected."""
    source_index: Optional[int] = None
    """Original insertion order index (used to map sorted views back to source items)."""
    payload: Optional[Any] = None
@@ -648,6 +650,11 @@ class ResultTable:
        if 0 <= row_index < len(self.rows):
            self.rows[row_index].selection_args = selection_args

+    def set_row_selection_action(self, row_index: int, selection_action: List[str]) -> None:
+        """Specify the entire stage tokens to run for this row on @N."""
+        if 0 <= row_index < len(self.rows):
+            self.rows[row_index].selection_action = selection_action
+
    def set_header_lines(self, lines: List[str]) -> "ResultTable":
        """Attach metadata lines that render beneath the title."""
        self.header_lines = [line for line in lines if line]
@@ -827,6 +834,30 @@ class ResultTable:
        if hasattr(result, "annotations") and result.annotations:
            row.add_column("Annotations", ", ".join(str(a) for a in result.annotations))

+        try:
+            md = getattr(result, "full_metadata", None)
+            md_dict = dict(md) if isinstance(md, dict) else {}
+        except Exception:
+            md_dict = {}
+
+        try:
+            selection_args = getattr(result, "selection_args", None)
+        except Exception:
+            selection_args = None
+        if selection_args is None:
+            selection_args = md_dict.get("_selection_args") or md_dict.get("selection_args")
+        if selection_args:
+            row.selection_args = [str(a) for a in selection_args if a is not None]
+
+        try:
+            selection_action = getattr(result, "selection_action", None)
+        except Exception:
+            selection_action = None
+        if selection_action is None:
+            selection_action = md_dict.get("_selection_action") or md_dict.get("selection_action")
+        if selection_action:
+            row.selection_action = [str(a) for a in selection_action if a is not None]
+
    def _add_result_item(self, row: ResultRow, item: Any) -> None:
        """Extract and add ResultItem fields to row (compact display for search results).

--- a/SYS/result_table_adapters.py
+++ b/SYS/result_table_adapters.py
@@ -10,10 +10,10 @@ from __future__ import annotations
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, List, Optional, Union

-from SYS.result_table_api import ColumnSpec, ProviderAdapter, ResultModel
+from SYS.result_table_api import ColumnSpec, ProviderAdapter, ResultModel, ResultTable, ensure_result_model


-ColumnFactory = Callable[[Iterable[ResultModel]], List[ColumnSpec]]
+ColumnFactory = Callable[[List[ResultModel]], List[ColumnSpec]]
 SelectionFn = Callable[[ResultModel], List[str]]


@@ -22,33 +22,57 @@ class Provider:
    name: str
    adapter: ProviderAdapter
    # columns can be a static list or a factory that derives columns from sample rows
-    columns: Optional[Union[List[ColumnSpec], ColumnFactory]] = None
-    selection_fn: Optional[SelectionFn] = None
+    columns: Union[List[ColumnSpec], ColumnFactory]
+    selection_fn: SelectionFn
    metadata: Optional[Dict[str, Any]] = None

    def get_columns(self, rows: Optional[Iterable[ResultModel]] = None) -> List[ColumnSpec]:
+        if self.columns is None:
+            raise ValueError(f"provider '{self.name}' must define columns")
+
        if callable(self.columns):
-            try:
-                rows_list = list(rows) if rows is not None else []
-                return list(self.columns(rows_list))
-            except Exception:
-                # Fall back to a minimal Title column on errors
-                return [ColumnSpec("title", "Title", lambda r: r.title)]
-        if self.columns is not None:
-            return list(self.columns)
-        # Default minimal column set
-        return [ColumnSpec("title", "Title", lambda r: r.title)]
+            rows_list = list(rows) if rows is not None else []
+            cols = list(self.columns(rows_list))
+        else:
+            cols = list(self.columns)
+
+        if not cols:
+            raise ValueError(f"provider '{self.name}' produced no columns")
+
+        return cols

    def selection_args(self, row: ResultModel) -> List[str]:
-        if callable(self.selection_fn):
-            try:
-                return list(self.selection_fn(row))
-            except Exception:
-                return []
-        # Default selector: prefer path flag, then title
-        if getattr(row, "path", None):
-            return ["-path", str(row.path)]
-        return ["-title", str(row.title)]
+        if not callable(self.selection_fn):
+            raise ValueError(f"provider '{self.name}' must define a selection function")
+
+        sel = list(self.selection_fn(ensure_result_model(row)))
+        return sel
+
+    def build_table(self, items: Iterable[Any]) -> ResultTable:
+        """Materialize adapter output into a ResultTable (strict, no legacy types)."""
+
+        try:
+            rows = [ensure_result_model(r) for r in self.adapter(items)]
+        except Exception as exc:
+            raise RuntimeError(f"provider '{self.name}' adapter failed") from exc
+
+        cols = self.get_columns(rows)
+        return ResultTable(provider=self.name, rows=rows, columns=cols, meta=self.metadata or {})
+
+    def serialize_row(self, row: ResultModel) -> Dict[str, Any]:
+        r = ensure_result_model(row)
+        return {
+            "title": r.title,
+            "path": r.path,
+            "ext": r.ext,
+            "size_bytes": r.size_bytes,
+            "metadata": r.metadata or {},
+            "source": r.source or self.name,
+            "_selection_args": self.selection_args(r),
+        }
+
+    def serialize_rows(self, rows: Iterable[ResultModel]) -> List[Dict[str, Any]]:
+        return [self.serialize_row(r) for r in rows]


 _PROVIDERS: Dict[str, Provider] = {}
@@ -58,8 +82,8 @@ def register_provider(
    name: str,
    adapter: ProviderAdapter,
    *,
-    columns: Optional[Union[List[ColumnSpec], ColumnFactory]] = None,
-    selection_fn: Optional[SelectionFn] = None,
+    columns: Union[List[ColumnSpec], ColumnFactory],
+    selection_fn: SelectionFn,
    metadata: Optional[Dict[str, Any]] = None,
 ) -> Provider:
    name = str(name or "").strip().lower()
@@ -67,13 +91,20 @@ def register_provider(
        raise ValueError("provider name required")
    if name in _PROVIDERS:
        raise ValueError(f"provider already registered: {name}")
+    if columns is None:
+        raise ValueError("provider registration requires columns")
+    if selection_fn is None:
+        raise ValueError("provider registration requires selection_fn")
    p = Provider(name=name, adapter=adapter, columns=columns, selection_fn=selection_fn, metadata=metadata)
    _PROVIDERS[name] = p
    return p


 def get_provider(name: str) -> Provider:
-    return _PROVIDERS[name.lower()]
+    normalized = str(name or "").lower()
+    if normalized not in _PROVIDERS:
+        raise KeyError(f"provider not registered: {name}")
+    return _PROVIDERS[normalized]


 def list_providers() -> List[str]:
--- a/SYS/result_table_api.py
+++ b/SYS/result_table_api.py
@@ -7,7 +7,7 @@ renderers must use. It intentionally refuses to accept legacy dicts/strings/objs
 from __future__ import annotations

 from dataclasses import dataclass, field
-from typing import Any, Callable, Dict, Iterable, Optional, Protocol
+from typing import Any, Callable, Dict, Iterable, List, Optional, Protocol


@dataclass(frozen=True)
@@ -33,6 +33,48 @@ class ResultModel:
    source: Optional[str] = None


+@dataclass(frozen=True)
+class ResultTable:
+    """Concrete, provider-owned table of rows/columns.
+
+    This is intentionally minimal: it only stores rows, column specs, and
+    optional metadata used by renderers. It does not auto-normalize legacy
+    objects or infer columns.
+    """
+
+    provider: str
+    rows: List[ResultModel]
+    columns: List[ColumnSpec]
+    meta: Dict[str, Any] = field(default_factory=dict)
+
+    def __post_init__(self) -> None:
+        if not str(self.provider or "").strip():
+            raise ValueError("provider required for ResultTable")
+        object.__setattr__(self, "rows", [ensure_result_model(r) for r in self.rows])
+        if not self.columns:
+            raise ValueError("columns are required for ResultTable")
+        object.__setattr__(self, "columns", list(self.columns))
+        object.__setattr__(self, "meta", dict(self.meta or {}))
+
+    def serialize_row(self, row: ResultModel, selection: Optional[List[str]] = None) -> Dict[str, Any]:
+        """Convert a row into pipeline-friendly dict (with selection args).
+
+        Selection args must be precomputed by the provider; this method only
+        copies them into the serialized dict.
+        """
+
+        r = ensure_result_model(row)
+        return {
+            "title": r.title,
+            "path": r.path,
+            "ext": r.ext,
+            "size_bytes": r.size_bytes,
+            "metadata": r.metadata or {},
+            "source": r.source or self.provider,
+            "_selection_args": list(selection or []),
+        }
+
+
@dataclass(frozen=True)
 class ColumnSpec:
    """Specification for a column that renderers will use.
@@ -100,6 +142,7 @@ def metadata_column(key: str, header: Optional[str] = None, format_fn: Optional[

 __all__ = [
    "ResultModel",
+    "ResultTable",
    "ColumnSpec",
    "ProviderAdapter",
    "Renderer",
--- a/SYS/result_table_renderers.py
+++ b/SYS/result_table_renderers.py
@@ -9,7 +9,7 @@ from __future__ import annotations

 from typing import Any, Dict, Iterable, Optional

-from SYS.result_table_api import ColumnSpec, ResultModel, Renderer
+from SYS.result_table_api import ColumnSpec, ResultModel, ResultTable, Renderer


 class RichRenderer(Renderer):
@@ -65,3 +65,22 @@ def render_to_console(rows: Iterable[ResultModel], columns: Iterable[ColumnSpec]

    table = RichRenderer().render(rows, columns, meta)
    Console().print(table)
+
+
+def render_result_table(table: ResultTable, renderer: Optional[Renderer] = None) -> Any:
+    """Render a ResultTable with the provided renderer (RichRenderer by default)."""
+
+    rend = renderer or RichRenderer()
+    return rend.render(table.rows, table.columns, table.meta)
+
+
+def render_result_table_to_console(table: ResultTable, renderer: Optional[Renderer] = None) -> None:
+    try:
+        from rich.console import Console
+    except Exception:
+        for r in table.rows:
+            print(" ".join(str((col.extractor(r) or "")) for col in table.columns))
+        return
+
+    console = Console()
+    console.print(render_result_table(table, renderer))
--- a/SYS/utils.py
+++ b/SYS/utils.py
@@ -66,6 +66,24 @@ def sanitize_metadata_value(value: Any) -> str | None:
    return value


+def sanitize_filename(name: str, *, max_len: int = 150) -> str:
+    """Return a filesystem-safe filename derived from *name*.
+
+    Replaces characters that are invalid on Windows with underscores and
+    collapses whitespace. Trims trailing periods and enforces a max length.
+    """
+    text = str(name or "").strip()
+    if not text:
+        return "download"
+
+    forbidden = set('<>:"/\\|?*')
+    cleaned = "".join("_" if c in forbidden else c for c in text)
+    cleaned = " ".join(cleaned.split()).strip().strip(".")
+    if not cleaned:
+        cleaned = "download"
+    return cleaned[:max_len]
+
+
 def unique_preserve_order(values: Iterable[str]) -> list[str]:
    seen: set[str] = set()
    ordered: list[str] = []