refactor(download): remove ProviderCore/download.py, move sanitize_filename to SYS.utils, replace callers to use API.HTTP.HTTPClient

This commit is contained in:
2026-01-06 01:38:59 -08:00
parent 3b363dd536
commit 41c11d39fd
38 changed files with 2640 additions and 526 deletions

View File

@@ -20,7 +20,7 @@ from pathlib import Path
from urllib.parse import unquote, urlparse, parse_qs from urllib.parse import unquote, urlparse, parse_qs
import logging import logging
from SYS.logger import debug, log from SYS.logger import debug, is_debug_enabled, log
from SYS.models import DebugLogger, DownloadError, DownloadMediaResult, ProgressBar from SYS.models import DebugLogger, DownloadError, DownloadMediaResult, ProgressBar
from SYS.utils import ensure_directory, sha256_file from SYS.utils import ensure_directory, sha256_file
@@ -51,7 +51,10 @@ def _resolve_verify_value(verify_ssl: bool) -> Union[bool, str]:
return env_cert return env_cert
def _try_module_bundle(mod_name: str) -> Optional[str]: def _try_module_bundle(mod_name: str) -> Optional[str]:
# Prefer checking sys.modules first (helps test injection / monkeypatching)
try: try:
mod = sys.modules.get(mod_name)
if mod is None:
mod = __import__(mod_name) mod = __import__(mod_name)
except Exception: except Exception:
return None return None
@@ -178,6 +181,28 @@ class HTTPClient:
self._httpx_verify = _resolve_verify_value(verify_ssl) self._httpx_verify = _resolve_verify_value(verify_ssl)
# Debug helpers
def _debug_panel(self, title: str, rows: List[tuple[str, Any]]) -> None:
if not is_debug_enabled():
return
try:
from rich.table import Table
from rich.panel import Panel
grid = Table.grid(padding=(0, 1))
grid.add_column("Key", style="cyan", no_wrap=True)
grid.add_column("Value")
for key, val in rows:
try:
grid.add_row(str(key), str(val))
except Exception:
grid.add_row(str(key), "<unprintable>")
debug(Panel(grid, title=title, expand=False))
except Exception:
# Fallback to simple debug output
debug(title, rows)
def __enter__(self): def __enter__(self):
"""Context manager entry.""" """Context manager entry."""
self._client = httpx.Client( self._client = httpx.Client(
@@ -425,8 +450,33 @@ class HTTPClient:
last_exception = None last_exception = None
for attempt in range(self.retries): for attempt in range(self.retries):
self._debug_panel(
"HTTP request",
[
("method", method),
("url", url),
("attempt", f"{attempt + 1}/{self.retries}"),
("params", kwargs.get("params")),
("headers", kwargs.get("headers")),
("verify", self._httpx_verify),
("follow_redirects", kwargs.get("follow_redirects", False)),
],
)
try: try:
response = self._client.request(method, url, **kwargs) response = self._client.request(method, url, **kwargs)
self._debug_panel(
"HTTP response",
[
("method", method),
("url", url),
("status", getattr(response, "status_code", "")),
("elapsed", getattr(response, "elapsed", "")),
(
"content_length",
response.headers.get("content-length") if hasattr(response, "headers") else "",
),
],
)
if raise_for_status: if raise_for_status:
response.raise_for_status() response.raise_for_status()
return response return response
@@ -537,6 +587,16 @@ class HTTPClient:
else: else:
kwargs["headers"] = self._get_headers() kwargs["headers"] = self._get_headers()
self._debug_panel(
"HTTP stream",
[
("method", method),
("url", url),
("headers", kwargs.get("headers")),
("follow_redirects", kwargs.get("follow_redirects", False)),
],
)
return self._client.stream(method, url, **kwargs) return self._client.stream(method, url, **kwargs)

189
CLI.py
View File

@@ -67,6 +67,7 @@ from SYS.cmdlet_catalog import (
) )
from SYS.config import get_local_storage_path, load_config from SYS.config import get_local_storage_path, load_config
from SYS.result_table import ResultTable from SYS.result_table import ResultTable
from ProviderCore.registry import provider_inline_query_choices
HELP_EXAMPLE_SOURCE_COMMANDS = { HELP_EXAMPLE_SOURCE_COMMANDS = {
".help-example", ".help-example",
@@ -797,10 +798,10 @@ class CmdletIntrospection:
@staticmethod @staticmethod
def store_choices(config: Dict[str, Any]) -> List[str]: def store_choices(config: Dict[str, Any]) -> List[str]:
try: try:
from Store import Store # Use config-only helper to avoid instantiating backends during completion
from Store.registry import list_configured_backend_names
storage = Store(config=config, suppress_debug=True) return list(list_configured_backend_names(config) or [])
return list(storage.list_backends() or [])
except Exception: except Exception:
return [] return []
@@ -903,6 +904,21 @@ class CmdletCompleter(Completer):
return used return used
@staticmethod
def _flag_value(tokens: Sequence[str], *flags: str) -> Optional[str]:
want = {str(f).strip().lower() for f in flags if str(f).strip()}
if not want:
return None
for idx, tok in enumerate(tokens):
low = str(tok or "").strip().lower()
if "=" in low:
head, val = low.split("=", 1)
if head in want:
return tok.split("=", 1)[1]
if low in want and idx + 1 < len(tokens):
return tokens[idx + 1]
return None
def get_completions( def get_completions(
self, self,
document: Document, document: Document,
@@ -971,6 +987,48 @@ class CmdletCompleter(Completer):
prev_token = stage_tokens[-2].lower() if len(stage_tokens) > 1 else "" prev_token = stage_tokens[-2].lower() if len(stage_tokens) > 1 else ""
config = self._config_loader.load() config = self._config_loader.load()
provider_name = None
if cmd_name == "search-file":
provider_name = self._flag_value(stage_tokens, "-provider", "--provider")
if (
cmd_name == "search-file"
and provider_name
and not ends_with_space
and ":" in current_token
and not current_token.startswith("-")
):
# Allow quoted tokens like "system:g
quote_prefix = current_token[0] if current_token[:1] in {"'", '"'} else ""
inline_token = current_token[1:] if quote_prefix else current_token
if inline_token.endswith(quote_prefix) and len(inline_token) > 1:
inline_token = inline_token[:-1]
# Allow comma-separated inline specs; operate on the last segment only.
if "," in inline_token:
inline_token = inline_token.split(",")[-1].lstrip()
if ":" not in inline_token:
return
field, partial = inline_token.split(":", 1)
field = field.strip().lower()
partial_lower = partial.strip().lower()
inline_choices = provider_inline_query_choices(provider_name, field, config)
if inline_choices:
filtered = (
[c for c in inline_choices if partial_lower in str(c).lower()]
if partial_lower
else list(inline_choices)
)
for choice in (filtered or inline_choices):
# Replace only the partial after the colon; keep the field prefix and quotes as typed.
start_pos = -len(partial)
suggestion = str(choice)
yield Completion(suggestion, start_position=start_pos)
return
choices = CmdletIntrospection.arg_choices( choices = CmdletIntrospection.arg_choices(
cmd_name=cmd_name, cmd_name=cmd_name,
arg_name=prev_token, arg_name=prev_token,
@@ -2580,7 +2638,16 @@ class PipelineExecutor:
else: else:
cmd_list = [] cmd_list = []
expanded_stage: List[str] = cmd_list + source_args + selected_row_args # IMPORTANT: Put selected row args *before* source_args.
# Rationale: The cmdlet argument parser treats the *first* unknown
# token as a positional value (e.g., URL). If `source_args`
# contain unknown flags (like -provider which download-file does
# not declare), they could be misinterpreted as the positional
# URL argument and cause attempts to download strings like
# "-provider" (which is invalid). By placing selection args
# first we ensure the intended URL/selection token is parsed
# as the positional URL and avoid this class of parsing errors.
expanded_stage: List[str] = cmd_list + selected_row_args + source_args
if first_stage_had_extra_args and stages: if first_stage_had_extra_args and stages:
expanded_stage += stages[0] expanded_stage += stages[0]
@@ -2592,15 +2659,11 @@ class PipelineExecutor:
try: try:
worker_manager.log_step( worker_manager.log_step(
pipeline_session.worker_id, pipeline_session.worker_id,
f"@N expansion: {source_cmd} + {' '.join(str(x) for x in selected_row_args)}", f"@N expansion: {source_cmd} + selected_args={selected_row_args} + source_args={source_args}",
) )
except Exception: except Exception:
pass pass
selection_indices = []
command_expanded = True
if (not command_expanded) and selection_indices:
stage_table = None stage_table = None
try: try:
stage_table = ctx.get_current_stage_table() stage_table = ctx.get_current_stage_table()
@@ -2770,6 +2833,41 @@ class PipelineExecutor:
except Exception: except Exception:
auto_stage = None auto_stage = None
def _apply_row_action_to_stage(stage_idx: int) -> bool:
if not selection_indices or len(selection_indices) != 1:
return False
try:
row_action = ctx.get_current_stage_table_row_selection_action(
selection_indices[0]
)
except Exception:
row_action = None
if not row_action:
# Fallback to serialized payload when the table row is unavailable
try:
items = ctx.get_last_result_items() or []
if 0 <= selection_indices[0] < len(items):
maybe = items[selection_indices[0]]
if isinstance(maybe, dict):
candidate = maybe.get("_selection_action")
if isinstance(candidate, (list, tuple)):
row_action = [str(x) for x in candidate if x is not None]
debug(f"@N row {selection_indices[0]} restored action from payload: {row_action}")
except Exception:
row_action = row_action or None
if not row_action:
debug(f"@N row {selection_indices[0]} has no selection_action")
return False
normalized = [str(x) for x in row_action if x is not None]
if not normalized:
return False
debug(f"Applying row action for row {selection_indices[0]} -> {normalized}")
if 0 <= stage_idx < len(stages):
debug(f"Replacing stage {stage_idx} {stages[stage_idx]} with row action {normalized}")
stages[stage_idx] = normalized
return True
return False
if not stages: if not stages:
if isinstance(table_type, str) and table_type.startswith("metadata."): if isinstance(table_type, str) and table_type.startswith("metadata."):
print("Auto-applying metadata selection via get-tag") print("Auto-applying metadata selection via get-tag")
@@ -2779,7 +2877,43 @@ class PipelineExecutor:
print(f"Auto-running selection via {auto_stage[0]}") print(f"Auto-running selection via {auto_stage[0]}")
except Exception: except Exception:
pass pass
stages.append(list(auto_stage)) # Append the auto stage now. If the user also provided a selection
# (e.g., @1 | add-file ...), we want to attach the row selection
# args *to the auto-inserted stage* so the download command receives
# the selected row information immediately.
stages.append(list(auto_stage) + (source_args or []))
debug(f"Inserted auto stage before row action: {stages[-1]}")
# If the caller included a selection (e.g., @1) try to attach
# the selection args immediately to the inserted auto stage so
# the expansion is effective in a single pass.
if selection_indices:
try:
if not _apply_row_action_to_stage(len(stages) - 1):
# Only support single-row selection for auto-attach here
if len(selection_indices) == 1:
idx = selection_indices[0]
row_args = ctx.get_current_stage_table_row_selection_args(idx)
if not row_args:
try:
items = ctx.get_last_result_items() or []
if 0 <= idx < len(items):
maybe = items[idx]
if isinstance(maybe, dict):
candidate = maybe.get("_selection_args")
if isinstance(candidate, (list, tuple)):
row_args = [str(x) for x in candidate if x is not None]
except Exception:
row_args = row_args or None
if row_args:
# Place selection args before any existing source args
inserted = stages[-1]
if inserted:
cmd = inserted[0]
tail = [str(x) for x in inserted[1:]]
stages[-1] = [cmd] + [str(x) for x in row_args] + tail
except Exception:
pass
else: else:
first_cmd = stages[0][0] if stages and stages[0] else None first_cmd = stages[0][0] if stages and stages[0] else None
if isinstance(table_type, str) and table_type.startswith("metadata.") and first_cmd not in ( if isinstance(table_type, str) and table_type.startswith("metadata.") and first_cmd not in (
@@ -2795,8 +2929,41 @@ class PipelineExecutor:
auto_cmd_norm = _norm_cmd(auto_stage[0]) auto_cmd_norm = _norm_cmd(auto_stage[0])
if first_cmd_norm not in (auto_cmd_norm, ".pipe", ".mpv"): if first_cmd_norm not in (auto_cmd_norm, ".pipe", ".mpv"):
debug(f"Auto-inserting {auto_cmd_norm} after selection") debug(f"Auto-inserting {auto_cmd_norm} after selection")
stages.insert(0, list(auto_stage)) # Insert the auto stage before the user-specified stage
stages.insert(0, list(auto_stage) + (source_args or []))
debug(f"Inserted auto stage before existing pipeline: {stages[0]}")
# If a selection is present, attach the row selection args to the
# newly-inserted stage so the download stage runs with the
# selected row information.
if selection_indices:
try:
if not _apply_row_action_to_stage(0):
if len(selection_indices) == 1:
idx = selection_indices[0]
row_args = ctx.get_current_stage_table_row_selection_args(idx)
if not row_args:
try:
items = ctx.get_last_result_items() or []
if 0 <= idx < len(items):
maybe = items[idx]
if isinstance(maybe, dict):
candidate = maybe.get("_selection_args")
if isinstance(candidate, (list, tuple)):
row_args = [str(x) for x in candidate if x is not None]
except Exception:
row_args = row_args or None
if row_args:
inserted = stages[0]
if inserted:
cmd = inserted[0]
tail = [str(x) for x in inserted[1:]]
stages[0] = [cmd] + [str(x) for x in row_args] + tail
except Exception:
pass
# After inserting/appending an auto-stage, continue processing so later
# selection-expansion logic can still run (e.g., for example selectors).
return True, piped_result return True, piped_result
else: else:
print("No previous results to select from\n") print("No previous results to select from\n")

View File

@@ -11,7 +11,7 @@ from urllib.parse import urlparse
from API.HTTP import HTTPClient, _download_direct_file from API.HTTP import HTTPClient, _download_direct_file
from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_torrent_file from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_torrent_file
from ProviderCore.base import Provider, SearchResult from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import sanitize_filename from SYS.utils import sanitize_filename
from SYS.logger import log, debug from SYS.logger import log, debug
from SYS.models import DownloadError from SYS.models import DownloadError
@@ -495,7 +495,7 @@ def adjust_output_dir_for_alldebrid(
full_metadata: Optional[Dict[str, Any]], full_metadata: Optional[Dict[str, Any]],
item: Any, item: Any,
) -> Path: ) -> Path:
from ProviderCore.download import sanitize_filename as _sf from SYS.utils import sanitize_filename as _sf
output_dir = base_output_dir output_dir = base_output_dir
md = full_metadata if isinstance(full_metadata, dict) else {} md = full_metadata if isinstance(full_metadata, dict) else {}

View File

@@ -7,10 +7,7 @@ from typing import Any, Dict, List, Optional
from ProviderCore.base import Provider, SearchResult from ProviderCore.base import Provider, SearchResult
from SYS.logger import log, debug from SYS.logger import log, debug
try: from tool.playwright import PlaywrightTool
from playwright.sync_api import sync_playwright
except ImportError: # pragma: no cover
sync_playwright = None
class Bandcamp(Provider): class Bandcamp(Provider):
@@ -137,8 +134,7 @@ class Bandcamp(Provider):
if not stage_is_last: if not stage_is_last:
return False return False
if sync_playwright is None: # Playwright is required; proceed to handle artist selection
return False
# Only handle artist selections. # Only handle artist selections.
chosen: List[Dict[str, Any]] = [] chosen: List[Dict[str, Any]] = []
@@ -219,11 +215,10 @@ class Bandcamp(Provider):
artist_url = chosen[0].get("url") or "" artist_url = chosen[0].get("url") or ""
try: try:
with sync_playwright() as p: tool = PlaywrightTool({})
browser = p.chromium.launch(headless=True) tool.require()
page = browser.new_page() with tool.open_page(headless=True) as page:
discography = self._scrape_artist_page(page, artist_url, limit=50) discography = self._scrape_artist_page(page, artist_url, limit=50)
browser.close()
except Exception as exc: except Exception as exc:
print(f"bandcamp artist lookup failed: {exc}\n") print(f"bandcamp artist lookup failed: {exc}\n")
return True return True
@@ -275,18 +270,10 @@ class Bandcamp(Provider):
Any]] = None, Any]] = None,
**kwargs: Any, **kwargs: Any,
) -> List[SearchResult]: ) -> List[SearchResult]:
if sync_playwright is None:
log(
"[bandcamp] Playwright not available. Install with: pip install playwright",
file=sys.stderr,
)
return []
try: try:
with sync_playwright() as p: tool = PlaywrightTool({})
browser = p.chromium.launch(headless=True) tool.require()
page = browser.new_page() with tool.open_page(headless=True) as page:
if query.strip().lower().startswith("artist:"): if query.strip().lower().startswith("artist:"):
artist_name = query[7:].strip().strip('"') artist_name = query[7:].strip().strip('"')
search_url = f"https://bandcamp.com/search?q={artist_name}&item_type=b" search_url = f"https://bandcamp.com/search?q={artist_name}&item_type=b"
@@ -294,8 +281,6 @@ class Bandcamp(Provider):
search_url = f"https://bandcamp.com/search?q={query}&item_type=a" search_url = f"https://bandcamp.com/search?q={query}&item_type=a"
results = self._scrape_url(page, search_url, limit) results = self._scrape_url(page, search_url, limit)
browser.close()
return results return results
except Exception as exc: except Exception as exc:
@@ -366,4 +351,5 @@ class Bandcamp(Provider):
return results return results
def validate(self) -> bool: def validate(self) -> bool:
return sync_playwright is not None # Playwright is required for the provider to function
return True

View File

@@ -10,7 +10,7 @@ from typing import Any, Dict, List, Optional
from urllib.parse import quote, urlparse from urllib.parse import quote, urlparse
from ProviderCore.base import Provider, SearchResult from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import sanitize_filename from SYS.utils import sanitize_filename
from SYS.logger import log from SYS.logger import log
# Helper for download-file: render selectable formats for a details URL. # Helper for download-file: render selectable formats for a details URL.

View File

@@ -11,7 +11,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import urljoin, urlparse, unquote from urllib.parse import urljoin, urlparse, unquote
from ProviderCore.base import Provider, SearchResult from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import sanitize_filename from SYS.utils import sanitize_filename
from SYS.logger import log from SYS.logger import log
from SYS.models import ProgressBar from SYS.models import ProgressBar

View File

@@ -18,7 +18,7 @@ import requests
from API.HTTP import HTTPClient from API.HTTP import HTTPClient
from ProviderCore.base import Provider, SearchResult from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import download_file, sanitize_filename from SYS.utils import sanitize_filename
from SYS.cli_syntax import get_field, get_free_text, parse_query from SYS.cli_syntax import get_field, get_free_text, parse_query
from SYS.logger import debug, log from SYS.logger import debug, log
from SYS.utils import unique_path from SYS.utils import unique_path
@@ -1541,19 +1541,23 @@ class OpenLibrary(Provider):
except Exception: except Exception:
pass pass
out_path = unique_path(output_dir / f"{safe_title}.pdf") out_path = unique_path(output_dir / f"{safe_title}.pdf")
ok = download_file( try:
with HTTPClient(timeout=30.0) as client:
path = client.download(
pdf_url, pdf_url,
out_path, str(out_path),
session=self._session, chunk_size=1024 * 256,
progress_callback=( progress_callback=(
( (lambda downloaded, total: progress_callback("bytes", downloaded, total, safe_title))
lambda downloaded, total, label: if progress_callback is not None
progress_callback("bytes", downloaded, total, label) else None
) if progress_callback is not None else None
), ),
) )
if ok: if path and path.exists():
return out_path return path
log("[openlibrary] Direct download failed", file=sys.stderr)
return None
except Exception:
log("[openlibrary] Direct download failed", file=sys.stderr) log("[openlibrary] Direct download failed", file=sys.stderr)
return None return None

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,7 @@
from __future__ import annotations from __future__ import annotations
import re
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
@@ -46,9 +48,51 @@ class SearchResult:
except Exception: except Exception:
pass pass
try:
selection_args = getattr(self, "selection_args", None)
except Exception:
selection_args = None
if selection_args is None:
try:
fm = getattr(self, "full_metadata", None)
if isinstance(fm, dict):
selection_args = fm.get("_selection_args") or fm.get("selection_args")
except Exception:
selection_args = None
if selection_args:
out["_selection_args"] = selection_args
return out return out
def parse_inline_query_arguments(raw_query: str) -> Tuple[str, Dict[str, str]]:
"""Extract inline key:value arguments from a provider search query."""
query_text = str(raw_query or "").strip()
if not query_text:
return "", {}
tokens = re.split(r"[,\s]+", query_text)
leftover: List[str] = []
parsed_args: Dict[str, str] = {}
for token in tokens:
if not token:
continue
sep_index = token.find(":")
if sep_index < 0:
sep_index = token.find("=")
if sep_index > 0:
key = token[:sep_index].strip().lower()
value = token[sep_index + 1 :].strip()
if key and value:
parsed_args[key] = value
continue
leftover.append(token)
return " ".join(leftover).strip(), parsed_args
class Provider(ABC): class Provider(ABC):
"""Unified provider base class. """Unified provider base class.
@@ -97,6 +141,12 @@ class Provider(ABC):
return [] return []
return out return out
def extract_query_arguments(self, query: str) -> Tuple[str, Dict[str, Any]]:
"""Allow providers to normalize query text and parse inline arguments."""
normalized = str(query or "").strip()
return normalized, {}
# Standard lifecycle/auth hook. # Standard lifecycle/auth hook.
def login(self, **_kwargs: Any) -> bool: def login(self, **_kwargs: Any) -> bool:
return True return True

View File

@@ -1,100 +0,0 @@
from __future__ import annotations
from pathlib import Path
from typing import Callable, Optional
import sys
import requests
from SYS.models import ProgressBar
def sanitize_filename(name: str, *, max_len: int = 150) -> str:
text = str(name or "").strip()
if not text:
return "download"
forbidden = set('<>:"/\\|?*')
cleaned = "".join("_" if c in forbidden else c for c in text)
cleaned = " ".join(cleaned.split()).strip().strip(".")
if not cleaned:
cleaned = "download"
return cleaned[:max_len]
def download_file(
url: str,
output_path: Path,
*,
session: Optional[requests.Session] = None,
timeout_s: float = 30.0,
progress_callback: Optional[Callable[[int,
Optional[int],
str],
None]] = None,
) -> bool:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
s = session or requests.Session()
bar = ProgressBar() if progress_callback is None else None
downloaded = 0
total = None
try:
with s.get(url, stream=True, timeout=timeout_s) as resp:
resp.raise_for_status()
try:
total_val = int(resp.headers.get("content-length") or 0)
total = total_val if total_val > 0 else None
except Exception:
total = None
label = str(output_path.name or "download")
# Render once immediately so fast downloads still show something.
try:
if progress_callback is not None:
progress_callback(0, total, label)
elif bar is not None:
bar.update(downloaded=0, total=total, label=label, file=sys.stderr)
except Exception:
pass
with open(output_path, "wb") as f:
for chunk in resp.iter_content(chunk_size=1024 * 256):
if chunk:
f.write(chunk)
downloaded += len(chunk)
try:
if progress_callback is not None:
progress_callback(downloaded, total, label)
elif bar is not None:
bar.update(
downloaded=downloaded,
total=total,
label=label,
file=sys.stderr
)
except Exception:
pass
try:
if bar is not None:
bar.finish()
except Exception:
pass
return output_path.exists() and output_path.stat().st_size > 0
except Exception:
try:
if bar is not None:
bar.finish()
except Exception:
pass
try:
if output_path.exists():
output_path.unlink()
except Exception:
pass
return False

View File

@@ -0,0 +1,127 @@
"""Inline query helpers for providers (choice normalization and filter resolution)."""
from __future__ import annotations
from typing import Any, Dict, List, Optional
def _normalize_choice(entry: Any) -> Optional[Dict[str, Any]]:
if entry is None:
return None
if isinstance(entry, dict):
value = entry.get("value")
text = entry.get("text") or entry.get("label") or value
aliases = entry.get("alias") or entry.get("aliases") or []
value_str = str(value) if value is not None else (str(text) if text is not None else None)
text_str = str(text) if text is not None else value_str
if not value_str or not text_str:
return None
alias_list = [str(a) for a in aliases if a is not None]
return {"value": value_str, "text": text_str, "aliases": alias_list}
return {"value": str(entry), "text": str(entry), "aliases": []}
def collect_choice(provider: Any) -> Dict[str, List[Dict[str, Any]]]:
"""Collect normalized inline/query argument choice entries from a provider.
Supports QUERY_ARG_CHOICES, INLINE_QUERY_FIELD_CHOICES, and the
helper methods valued by Providers (`query_field_choices` /
`inline_query_field_choices`). Each choice is normalized to {value,text,aliases}.
"""
mapping: Dict[str, List[Dict[str, Any]]] = {}
def _ingest(source: Any, target_key: str) -> None:
normalized: List[Dict[str, Any]] = []
seq = source
try:
if callable(seq):
seq = seq()
except Exception:
seq = source
if isinstance(seq, dict):
seq = seq.get("choices") or seq.get("values") or seq
if isinstance(seq, (list, tuple, set)):
for entry in seq:
n = _normalize_choice(entry)
if n:
normalized.append(n)
if normalized:
mapping[target_key] = normalized
base = getattr(provider, "QUERY_ARG_CHOICES", None)
if isinstance(base, dict):
for k, v in base.items():
key_norm = str(k).strip().lower()
if not key_norm:
continue
_ingest(v, key_norm)
try:
fn = getattr(provider, "inline_query_field_choices", None)
if callable(fn):
extra = fn()
if isinstance(extra, dict):
for k, v in extra.items():
key_norm = str(k).strip().lower()
if not key_norm:
continue
_ingest(v, key_norm)
except Exception:
pass
return mapping
def resolve_filter(
provider: Any,
inline_args: Dict[str, Any],
*,
field_transforms: Optional[Dict[str, Any]] = None,
) -> Dict[str, str]:
"""Map inline query args to provider filter values using declared choices.
- Uses provider choice mapping (value/text/aliases) to resolve user text.
- Applies optional per-field transforms (e.g., str.upper).
- Returns normalized filters suitable for provider.search.
"""
filters: Dict[str, str] = {}
if not inline_args:
return filters
mapping = collect_choice(provider)
transforms = field_transforms or {}
for raw_key, raw_val in inline_args.items():
if raw_val is None:
continue
key = str(raw_key or "").strip().lower()
val_str = str(raw_val).strip()
if not key or not val_str:
continue
entries = mapping.get(key, [])
resolved: Optional[str] = None
val_lower = val_str.lower()
for entry in entries:
text = str(entry.get("text") or "").strip()
value = str(entry.get("value") or "").strip()
aliases = [str(a).strip() for a in entry.get("aliases", []) if a is not None]
alias_lowers = {a.lower() for a in aliases}
if val_lower in {text.lower(), value.lower()} or val_lower in alias_lowers:
resolved = value or text or val_str
break
if resolved is None:
resolved = val_str
transform = transforms.get(key)
if callable(transform):
try:
resolved = transform(resolved)
except Exception:
pass
if resolved:
filters[key] = str(resolved)
return filters

View File

@@ -89,7 +89,6 @@ class ProviderRegistry:
replace: bool = False, replace: bool = False,
) -> ProviderInfo: ) -> ProviderInfo:
"""Register a provider class with canonical and alias names.""" """Register a provider class with canonical and alias names."""
candidates = self._candidate_names(provider_class, override_name) candidates = self._candidate_names(provider_class, override_name)
if not candidates: if not candidates:
raise ValueError("provider name candidates are required") raise ValueError("provider name candidates are required")
@@ -397,6 +396,125 @@ def match_provider_name_for_url(url: str) -> Optional[str]:
return None return None
def provider_inline_query_choices(
provider_name: str,
field_name: str,
config: Optional[Dict[str, Any]] = None,
) -> List[str]:
"""Return provider-declared inline query choices for a field (e.g., system:GBA).
Providers can expose a mapping via ``QUERY_ARG_CHOICES`` (preferred) or
``INLINE_QUERY_FIELD_CHOICES`` / ``inline_query_field_choices()``. The helper
keeps completion logic simple and reusable.
This helper keeps completion logic simple and reusable.
"""
pname = str(provider_name or "").strip().lower()
field = str(field_name or "").strip().lower()
if not pname or not field:
return []
provider = get_search_provider(pname, config)
if provider is None:
provider = get_provider(pname, config)
if provider is None:
return []
def _normalize_choice_entry(entry: Any) -> Optional[Dict[str, Any]]:
if entry is None:
return None
if isinstance(entry, dict):
value = entry.get("value")
text = entry.get("text") or entry.get("label") or value
aliases = entry.get("alias") or entry.get("aliases") or []
value_str = str(value) if value is not None else (str(text) if text is not None else None)
text_str = str(text) if text is not None else value_str
if not value_str or not text_str:
return None
alias_list = [str(a) for a in aliases if a is not None]
return {"value": value_str, "text": text_str, "aliases": alias_list}
# string/other primitives
return {"value": str(entry), "text": str(entry), "aliases": []}
def _collect_mapping(p) -> Dict[str, List[Dict[str, Any]]]:
mapping: Dict[str, List[Dict[str, Any]]] = {}
base = getattr(p, "QUERY_ARG_CHOICES", None)
if not isinstance(base, dict):
base = getattr(p, "INLINE_QUERY_FIELD_CHOICES", None)
if isinstance(base, dict):
for k, v in base.items():
normalized: List[Dict[str, Any]] = []
seq = v
try:
if callable(seq):
seq = seq()
except Exception:
seq = v
if isinstance(seq, dict):
seq = seq.get("choices") or seq.get("values") or seq
if isinstance(seq, (list, tuple, set)):
for entry in seq:
n = _normalize_choice_entry(entry)
if n:
normalized.append(n)
if normalized:
mapping[str(k).strip().lower()] = normalized
try:
fn = getattr(p, "inline_query_field_choices", None)
if callable(fn):
extra = fn()
if isinstance(extra, dict):
for k, v in extra.items():
normalized: List[Dict[str, Any]] = []
seq = v
try:
if callable(seq):
seq = seq()
except Exception:
seq = v
if isinstance(seq, dict):
seq = seq.get("choices") or seq.get("values") or seq
if isinstance(seq, (list, tuple, set)):
for entry in seq:
n = _normalize_choice_entry(entry)
if n:
normalized.append(n)
if normalized:
mapping[str(k).strip().lower()] = normalized
except Exception:
pass
return mapping
try:
mapping = _collect_mapping(provider)
if not mapping:
return []
entries = mapping.get(field, [])
if not entries:
return []
seen: set[str] = set()
out: List[str] = []
for entry in entries:
text = entry.get("text") or entry.get("value")
if not text:
continue
text_str = str(text)
if text_str in seen:
continue
seen.add(text_str)
out.append(text_str)
for alias in entry.get("aliases", []):
alias_str = str(alias)
if alias_str and alias_str not in seen:
seen.add(alias_str)
out.append(alias_str)
return out
except Exception:
return []
def get_provider_for_url(url: str, def get_provider_for_url(url: str,
config: Optional[Dict[str, Any]] = None) -> Optional[Provider]: config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
name = match_provider_name_for_url(url) name = match_provider_name_for_url(url)
@@ -405,6 +523,60 @@ def get_provider_for_url(url: str,
return get_provider(name, config) return get_provider(name, config)
def resolve_inline_filters(
provider: Provider,
inline_args: Dict[str, Any],
*,
field_transforms: Optional[Dict[str, Any]] = None,
) -> Dict[str, str]:
"""Map inline query args to provider filter values using declared choices.
- Uses provider's inline choice mapping (value/text/aliases) to resolve user text.
- Applies optional per-field transforms (e.g., str.upper).
- Returns normalized filters suitable for provider.search.
"""
filters: Dict[str, str] = {}
if not inline_args:
return filters
mapping = _collect_mapping(provider)
transforms = field_transforms or {}
for raw_key, raw_val in inline_args.items():
if raw_val is None:
continue
key = str(raw_key or "").strip().lower()
val_str = str(raw_val).strip()
if not key or not val_str:
continue
entries = mapping.get(key, [])
resolved: Optional[str] = None
val_lower = val_str.lower()
for entry in entries:
text = str(entry.get("text") or "").strip()
value = str(entry.get("value") or "").strip()
aliases = [str(a).strip() for a in entry.get("aliases", []) if a is not None]
if val_lower in {text.lower(), value.lower()} or val_lower in {a.lower() for a in aliases}:
resolved = value or text or val_str
break
if resolved is None:
resolved = val_str
transform = transforms.get(key)
if callable(transform):
try:
resolved = transform(resolved)
except Exception:
pass
if resolved:
filters[key] = str(resolved)
return filters
__all__ = [ __all__ = [
"ProviderInfo", "ProviderInfo",
"Provider", "Provider",
@@ -423,4 +595,5 @@ __all__ = [
"get_provider_class", "get_provider_class",
"selection_auto_stage_for_table", "selection_auto_stage_for_table",
"download_soulseek_file", "download_soulseek_file",
"provider_inline_query_choices",
] ]

302
SYS/html_table.py Normal file
View File

@@ -0,0 +1,302 @@
"""Small helper utilities for extracting structured records from HTML tables
using lxml.
Goal: make it trivial for provider authors to extract table rows and common
fields (title, link, standardized column keys) without re-implementing the
same heuristics in every provider.
Key functions:
- find_candidate_nodes(doc_or_html, xpaths=...)
- extract_records(doc_or_html, base_url=None, xpaths=...)
- normalize_header(name, synonyms=...)
This module intentionally avoids heavyweight deps (no pandas) and works with
`lxml.html` elements (the project already uses lxml).
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional, Tuple
from lxml import html as lxml_html
from urllib.parse import urljoin
import re
# Default xpaths for candidate result containers
_DEFAULT_XPATHS = [
"//table//tbody/tr",
"//table//tr[td]",
"//div[contains(@class,'list-item')]",
"//div[contains(@class,'result')]",
"//li[contains(@class,'item')]",
]
# Simple header synonyms (you can extend as needed)
_DEFAULT_SYNONYMS = {
"platform": "system",
"system": "system",
"name": "title",
"title": "title",
}
def _ensure_doc(doc_or_html: Any) -> lxml_html.HtmlElement:
if isinstance(doc_or_html, str):
return lxml_html.fromstring(doc_or_html)
return doc_or_html
def _text_or_img_title(el) -> str:
# Prefer img/@title if present (useful for flag icons)
try:
imgs = el.xpath('.//img/@title')
if imgs:
return str(imgs[0]).strip()
except Exception:
pass
return (el.text_content() or "").strip()
def find_candidate_nodes(doc_or_html: Any, xpaths: Optional[List[str]] = None) -> Tuple[List[Any], Optional[str]]:
"""Find candidate nodes for results using a prioritized xpath list.
Returns (nodes, chosen_xpath).
"""
doc = _ensure_doc(doc_or_html)
for xp in (xpaths or _DEFAULT_XPATHS):
try:
found = doc.xpath(xp)
if found:
return list(found), xp
except Exception:
continue
return [], None
def _parse_tr_nodes(tr_nodes: List[Any], base: Optional[str] = None) -> List[Dict[str, str]]:
out: List[Dict[str, str]] = []
for tr in tr_nodes:
try:
tds = tr.xpath("./td")
if not tds or len(tds) < 1:
continue
# canonical fields
rec: Dict[str, str] = {}
# Heuristic: if the first cell contains an anchor, treat it as the title/path
# (detail pages often put the file link in the first column and size in the second).
a0 = tds[0].xpath('.//a[contains(@href,"/vault/")]') or tds[0].xpath('.//a')
if a0:
rec["title"] = (a0[0].text_content() or "").strip()
href = a0[0].get("href")
rec["path"] = urljoin(base, href) if href and base else (href or "")
# Try to find a size cell in the remaining tds (class 'size' is common)
size_val = None
for td in tds[1:]:
s = td.xpath('.//span[contains(@class,"size")]/text()')
if s:
size_val = str(s[0]).strip()
break
if not size_val and len(tds) > 1:
txt = (tds[1].text_content() or "").strip()
# crude size heuristic: contains digits and a unit letter
if txt and re.search(r"\d", txt):
size_val = txt
if size_val:
rec["size"] = size_val
else:
# First cell often "system"/"platform"
rec["platform"] = _text_or_img_title(tds[0])
# Title + optional link from second column
if len(tds) > 1:
a = tds[1].xpath('.//a[contains(@href,"/vault/")]') or tds[1].xpath('.//a')
if a:
rec["title"] = (a[0].text_content() or "").strip()
href = a[0].get("href")
rec["path"] = urljoin(base, href) if href and base else (href or "")
else:
rec["title"] = (tds[1].text_content() or "").strip()
# Additional columns in common Vimm layout
if len(tds) > 2:
rec["region"] = _text_or_img_title(tds[2]).strip()
if len(tds) > 3:
rec["version"] = (tds[3].text_content() or "").strip()
if len(tds) > 4:
rec["languages"] = (tds[4].text_content() or "").strip()
out.append(rec)
except Exception:
continue
return out
def _parse_list_item_nodes(nodes: List[Any], base: Optional[str] = None) -> List[Dict[str, str]]:
out: List[Dict[str, str]] = []
for node in nodes:
try:
rec: Dict[str, str] = {}
# title heuristics
a = node.xpath('.//h2/a') or node.xpath('.//a')
if a:
rec["title"] = (a[0].text_content() or "").strip()
href = a[0].get("href")
rec["path"] = urljoin(base, href) if href and base else (href or "")
else:
rec["title"] = (node.text_content() or "").strip()
# platform, size
p = node.xpath('.//span[contains(@class,"platform")]/text()')
if p:
rec["platform"] = str(p[0]).strip()
s = node.xpath('.//span[contains(@class,"size")]/text()')
if s:
rec["size"] = str(s[0]).strip()
out.append(rec)
except Exception:
continue
return out
def normalize_header(name: str, synonyms: Optional[Dict[str, str]] = None) -> str:
"""Normalize header names to a canonical form.
Defaults map 'platform' -> 'system' and 'name' -> 'title', but callers
can pass a custom synonyms dict.
"""
if not name:
return ""
s = str(name or "").strip().lower()
s = re.sub(r"\s+", "_", s)
syn = (synonyms or _DEFAULT_SYNONYMS).get(s)
return syn or s
def extract_records(doc_or_html: Any, base_url: Optional[str] = None, xpaths: Optional[List[str]] = None, use_pandas_if_available: bool = True) -> Tuple[List[Dict[str, str]], Optional[str]]:
"""Find result candidate nodes and return a list of normalized records plus chosen xpath.
If pandas is available and `use_pandas_if_available` is True, attempt to parse
HTML tables using `pandas.read_html` and return those records. Falls back to
node-based parsing when pandas is not available or fails. Returns (records, chosen)
where `chosen` is the xpath that matched or the string 'pandas' when the
pandas path was used.
"""
# Prepare an HTML string for pandas if needed
html_text: Optional[str] = None
if isinstance(doc_or_html, (bytes, bytearray)):
try:
html_text = doc_or_html.decode("utf-8")
except Exception:
html_text = doc_or_html.decode("latin-1", errors="ignore")
elif isinstance(doc_or_html, str):
html_text = doc_or_html
else:
try:
html_text = lxml_html.tostring(doc_or_html, encoding="unicode")
except Exception:
html_text = str(doc_or_html)
# Try pandas first when available and requested
if use_pandas_if_available and html_text is not None:
try:
import pandas as _pd # type: ignore
dfs = _pd.read_html(html_text)
if dfs:
# pick the largest dataframe by row count for heuristics
df = max(dfs, key=lambda d: getattr(d, "shape", (len(getattr(d, 'index', [])), 0))[0])
try:
rows = df.to_dict("records")
except Exception:
# Some DataFrame-like objects may have slightly different APIs
rows = [dict(r) for r in df]
records: List[Dict[str, str]] = []
for row in rows:
nr: Dict[str, str] = {}
for k, v in (row or {}).items():
nk = normalize_header(str(k or ""))
nr[nk] = (str(v).strip() if v is not None else "")
records.append(nr)
# Attempt to recover hrefs by matching anchor text -> href
try:
doc = lxml_html.fromstring(html_text)
anchors = {}
for a in doc.xpath('//a'):
txt = (a.text_content() or "").strip()
href = a.get("href")
if txt and href and txt not in anchors:
anchors[txt] = href
for rec in records:
if not rec.get("path") and rec.get("title"):
href = anchors.get(rec["title"])
if href:
rec["path"] = urljoin(base_url, href) if base_url else href
except Exception:
pass
return records, "pandas"
except Exception:
# Pandas not present or parsing failed; fall back to node parsing
pass
# Fallback to node-based parsing
nodes, chosen = find_candidate_nodes(doc_or_html, xpaths=xpaths)
if not nodes:
return [], chosen
# Determine node type and parse accordingly
first = nodes[0]
tag = getattr(first, "tag", "").lower()
if tag == "tr":
records = _parse_tr_nodes(nodes, base=base_url)
else:
# list-item style
records = _parse_list_item_nodes(nodes, base=base_url)
# Normalize keys (map platform->system etc)
normed: List[Dict[str, str]] = []
for r in records:
nr: Dict[str, str] = {}
for k, v in (r or {}).items():
nk = normalize_header(k)
nr[nk] = v
normed.append(nr)
return normed, chosen
# Small convenience: convert records to SearchResult. Providers can call this or
# use their own mapping when they need full SearchResult objects.
from ProviderCore.base import SearchResult # local import to avoid circular issues
def records_to_search_results(records: List[Dict[str, str]], table: str = "provider") -> List[SearchResult]:
out: List[SearchResult] = []
for rec in records:
title = rec.get("title") or rec.get("name") or ""
path = rec.get("path") or ""
meta = dict(rec)
out.append(
SearchResult(
table=table,
title=str(title),
path=str(path),
detail="",
annotations=[],
media_kind="file",
size_bytes=None,
tag={table},
columns=[(k.title(), v) for k, v in rec.items() if k and v],
full_metadata={"raw_record": rec, "raw": rec},
)
)
return out

View File

@@ -972,6 +972,16 @@ def get_last_result_table_row_selection_args(row_index: int) -> Optional[List[st
return None return None
def get_last_result_table_row_selection_action(row_index: int) -> Optional[List[str]]:
"""Get the expanded stage tokens for a row in the last result table."""
state = _get_pipeline_state()
if _is_selectable_table(state.last_result_table) and hasattr(state.last_result_table, "rows"):
if 0 <= row_index < len(state.last_result_table.rows):
row = state.last_result_table.rows[row_index]
if hasattr(row, "selection_action"):
return row.selection_action
return None
def set_current_stage_table(result_table: Optional[Any]) -> None: def set_current_stage_table(result_table: Optional[Any]) -> None:
"""Store the current pipeline stage table for @N expansion. """Store the current pipeline stage table for @N expansion.
@@ -1035,6 +1045,17 @@ def get_current_stage_table_row_selection_args(row_index: int) -> Optional[List[
return None return None
def get_current_stage_table_row_selection_action(row_index: int) -> Optional[List[str]]:
"""Get the expanded stage tokens for a row in the current stage table."""
state = _get_pipeline_state()
if _is_selectable_table(state.current_stage_table) and hasattr(state.current_stage_table, "rows"):
if 0 <= row_index < len(state.current_stage_table.rows):
row = state.current_stage_table.rows[row_index]
if hasattr(row, "selection_action"):
return row.selection_action
return None
def get_current_stage_table_row_source_index(row_index: int) -> Optional[int]: def get_current_stage_table_row_source_index(row_index: int) -> Optional[int]:
"""Get the original source index for a row in the current stage table. """Get the original source index for a row in the current stage table.

110
SYS/provider_helpers.py Normal file
View File

@@ -0,0 +1,110 @@
"""Convenience mixins and helpers for table-based providers.
Provides a small `TableProviderMixin` that handles HTTP fetch + table extraction
(using `SYS.html_table.extract_records`) and converts records into
`ProviderCore.base.SearchResult` rows with sane default column ordering.
Providers can subclass this mixin to implement search quickly:
class MyProvider(TableProviderMixin, Provider):
URL = ("https://example.org/search",)
def search(self, query, limit=50, **kwargs):
url = f"{self.URL[0]}?q={quote_plus(query)}"
return self.search_table_from_url(url, limit=limit, xpaths=self.DEFAULT_XPATHS)
The mixin deliberately avoids adding heavy dependencies (uses our lxml helper)
so authors don't have to install pandas/bs4 unless they want to.
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional
from urllib.parse import quote_plus
from API.HTTP import HTTPClient
from ProviderCore.base import SearchResult
from SYS.html_table import extract_records
import lxml.html as lxml_html
class TableProviderMixin:
"""Mixin to simplify providers that scrape table/list results from HTML.
Methods:
- search_table_from_url(url, limit, xpaths): fetches HTML, extracts records, returns SearchResults
- DEFAULT_XPATHS: default xpath list used when none is provided
"""
# Reuse the same defaults as the html_table helper
DEFAULT_XPATHS: List[str] = [
"//table//tbody/tr",
"//table//tr[td]",
"//div[contains(@class,'list-item')]",
"//div[contains(@class,'result')]",
"//li[contains(@class,'item')]",
]
def search_table_from_url(self, url: str, limit: int = 50, xpaths: Optional[List[str]] = None, timeout: float = 15.0) -> List[SearchResult]:
"""Fetch `url`, extract table/list records, and return SearchResult list.
`xpaths` is passed to `extract_records` (falls back to DEFAULT_XPATHS).
"""
if not url:
return []
try:
with HTTPClient(timeout=timeout) as client:
resp = client.get(url)
content = resp.content
except Exception:
return []
# Ensure we pass an lxml document or string (httpx returns bytes)
try:
doc = lxml_html.fromstring(content)
except Exception:
try:
doc = content.decode("utf-8")
except Exception:
doc = str(content)
records, chosen = extract_records(doc, base_url=url, xpaths=xpaths or self.DEFAULT_XPATHS)
results: List[SearchResult] = []
for rec in (records or [])[: int(limit)]:
title = rec.get("title") or ""
path = rec.get("path") or ""
platform = rec.get("system") or rec.get("platform") or ""
size = rec.get("size") or ""
region = rec.get("region") or ""
version = rec.get("version") or ""
languages = rec.get("languages") or ""
cols = [("Title", title)]
if platform:
cols.append(("Platform", platform))
if size:
cols.append(("Size", size))
if region:
cols.append(("Region", region))
if version:
cols.append(("Version", version))
if languages:
cols.append(("Languages", languages))
results.append(
SearchResult(
table=(getattr(self, "name", "provider") or "provider"),
title=title,
path=path,
detail="",
annotations=[],
media_kind="file",
size_bytes=None,
tag={getattr(self, "name", "provider")},
columns=cols,
full_metadata={"raw_record": rec},
)
)
return results

View File

@@ -359,6 +359,8 @@ class ResultRow:
columns: List[ResultColumn] = field(default_factory=list) columns: List[ResultColumn] = field(default_factory=list)
selection_args: Optional[List[str]] = None selection_args: Optional[List[str]] = None
"""Arguments to use for this row when selected via @N syntax (e.g., ['-item', '3'])""" """Arguments to use for this row when selected via @N syntax (e.g., ['-item', '3'])"""
selection_action: Optional[List[str]] = None
"""Full expanded stage tokens that should run when this row is selected."""
source_index: Optional[int] = None source_index: Optional[int] = None
"""Original insertion order index (used to map sorted views back to source items).""" """Original insertion order index (used to map sorted views back to source items)."""
payload: Optional[Any] = None payload: Optional[Any] = None
@@ -648,6 +650,11 @@ class ResultTable:
if 0 <= row_index < len(self.rows): if 0 <= row_index < len(self.rows):
self.rows[row_index].selection_args = selection_args self.rows[row_index].selection_args = selection_args
def set_row_selection_action(self, row_index: int, selection_action: List[str]) -> None:
"""Specify the entire stage tokens to run for this row on @N."""
if 0 <= row_index < len(self.rows):
self.rows[row_index].selection_action = selection_action
def set_header_lines(self, lines: List[str]) -> "ResultTable": def set_header_lines(self, lines: List[str]) -> "ResultTable":
"""Attach metadata lines that render beneath the title.""" """Attach metadata lines that render beneath the title."""
self.header_lines = [line for line in lines if line] self.header_lines = [line for line in lines if line]
@@ -827,6 +834,30 @@ class ResultTable:
if hasattr(result, "annotations") and result.annotations: if hasattr(result, "annotations") and result.annotations:
row.add_column("Annotations", ", ".join(str(a) for a in result.annotations)) row.add_column("Annotations", ", ".join(str(a) for a in result.annotations))
try:
md = getattr(result, "full_metadata", None)
md_dict = dict(md) if isinstance(md, dict) else {}
except Exception:
md_dict = {}
try:
selection_args = getattr(result, "selection_args", None)
except Exception:
selection_args = None
if selection_args is None:
selection_args = md_dict.get("_selection_args") or md_dict.get("selection_args")
if selection_args:
row.selection_args = [str(a) for a in selection_args if a is not None]
try:
selection_action = getattr(result, "selection_action", None)
except Exception:
selection_action = None
if selection_action is None:
selection_action = md_dict.get("_selection_action") or md_dict.get("selection_action")
if selection_action:
row.selection_action = [str(a) for a in selection_action if a is not None]
def _add_result_item(self, row: ResultRow, item: Any) -> None: def _add_result_item(self, row: ResultRow, item: Any) -> None:
"""Extract and add ResultItem fields to row (compact display for search results). """Extract and add ResultItem fields to row (compact display for search results).

View File

@@ -10,10 +10,10 @@ from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Callable, Dict, Iterable, List, Optional, Union from typing import Any, Callable, Dict, Iterable, List, Optional, Union
from SYS.result_table_api import ColumnSpec, ProviderAdapter, ResultModel from SYS.result_table_api import ColumnSpec, ProviderAdapter, ResultModel, ResultTable, ensure_result_model
ColumnFactory = Callable[[Iterable[ResultModel]], List[ColumnSpec]] ColumnFactory = Callable[[List[ResultModel]], List[ColumnSpec]]
SelectionFn = Callable[[ResultModel], List[str]] SelectionFn = Callable[[ResultModel], List[str]]
@@ -22,33 +22,57 @@ class Provider:
name: str name: str
adapter: ProviderAdapter adapter: ProviderAdapter
# columns can be a static list or a factory that derives columns from sample rows # columns can be a static list or a factory that derives columns from sample rows
columns: Optional[Union[List[ColumnSpec], ColumnFactory]] = None columns: Union[List[ColumnSpec], ColumnFactory]
selection_fn: Optional[SelectionFn] = None selection_fn: SelectionFn
metadata: Optional[Dict[str, Any]] = None metadata: Optional[Dict[str, Any]] = None
def get_columns(self, rows: Optional[Iterable[ResultModel]] = None) -> List[ColumnSpec]: def get_columns(self, rows: Optional[Iterable[ResultModel]] = None) -> List[ColumnSpec]:
if self.columns is None:
raise ValueError(f"provider '{self.name}' must define columns")
if callable(self.columns): if callable(self.columns):
try:
rows_list = list(rows) if rows is not None else [] rows_list = list(rows) if rows is not None else []
return list(self.columns(rows_list)) cols = list(self.columns(rows_list))
except Exception: else:
# Fall back to a minimal Title column on errors cols = list(self.columns)
return [ColumnSpec("title", "Title", lambda r: r.title)]
if self.columns is not None: if not cols:
return list(self.columns) raise ValueError(f"provider '{self.name}' produced no columns")
# Default minimal column set
return [ColumnSpec("title", "Title", lambda r: r.title)] return cols
def selection_args(self, row: ResultModel) -> List[str]: def selection_args(self, row: ResultModel) -> List[str]:
if callable(self.selection_fn): if not callable(self.selection_fn):
raise ValueError(f"provider '{self.name}' must define a selection function")
sel = list(self.selection_fn(ensure_result_model(row)))
return sel
def build_table(self, items: Iterable[Any]) -> ResultTable:
"""Materialize adapter output into a ResultTable (strict, no legacy types)."""
try: try:
return list(self.selection_fn(row)) rows = [ensure_result_model(r) for r in self.adapter(items)]
except Exception: except Exception as exc:
return [] raise RuntimeError(f"provider '{self.name}' adapter failed") from exc
# Default selector: prefer path flag, then title
if getattr(row, "path", None): cols = self.get_columns(rows)
return ["-path", str(row.path)] return ResultTable(provider=self.name, rows=rows, columns=cols, meta=self.metadata or {})
return ["-title", str(row.title)]
def serialize_row(self, row: ResultModel) -> Dict[str, Any]:
r = ensure_result_model(row)
return {
"title": r.title,
"path": r.path,
"ext": r.ext,
"size_bytes": r.size_bytes,
"metadata": r.metadata or {},
"source": r.source or self.name,
"_selection_args": self.selection_args(r),
}
def serialize_rows(self, rows: Iterable[ResultModel]) -> List[Dict[str, Any]]:
return [self.serialize_row(r) for r in rows]
_PROVIDERS: Dict[str, Provider] = {} _PROVIDERS: Dict[str, Provider] = {}
@@ -58,8 +82,8 @@ def register_provider(
name: str, name: str,
adapter: ProviderAdapter, adapter: ProviderAdapter,
*, *,
columns: Optional[Union[List[ColumnSpec], ColumnFactory]] = None, columns: Union[List[ColumnSpec], ColumnFactory],
selection_fn: Optional[SelectionFn] = None, selection_fn: SelectionFn,
metadata: Optional[Dict[str, Any]] = None, metadata: Optional[Dict[str, Any]] = None,
) -> Provider: ) -> Provider:
name = str(name or "").strip().lower() name = str(name or "").strip().lower()
@@ -67,13 +91,20 @@ def register_provider(
raise ValueError("provider name required") raise ValueError("provider name required")
if name in _PROVIDERS: if name in _PROVIDERS:
raise ValueError(f"provider already registered: {name}") raise ValueError(f"provider already registered: {name}")
if columns is None:
raise ValueError("provider registration requires columns")
if selection_fn is None:
raise ValueError("provider registration requires selection_fn")
p = Provider(name=name, adapter=adapter, columns=columns, selection_fn=selection_fn, metadata=metadata) p = Provider(name=name, adapter=adapter, columns=columns, selection_fn=selection_fn, metadata=metadata)
_PROVIDERS[name] = p _PROVIDERS[name] = p
return p return p
def get_provider(name: str) -> Provider: def get_provider(name: str) -> Provider:
return _PROVIDERS[name.lower()] normalized = str(name or "").lower()
if normalized not in _PROVIDERS:
raise KeyError(f"provider not registered: {name}")
return _PROVIDERS[normalized]
def list_providers() -> List[str]: def list_providers() -> List[str]:

View File

@@ -7,7 +7,7 @@ renderers must use. It intentionally refuses to accept legacy dicts/strings/objs
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Any, Callable, Dict, Iterable, Optional, Protocol from typing import Any, Callable, Dict, Iterable, List, Optional, Protocol
@dataclass(frozen=True) @dataclass(frozen=True)
@@ -33,6 +33,48 @@ class ResultModel:
source: Optional[str] = None source: Optional[str] = None
@dataclass(frozen=True)
class ResultTable:
"""Concrete, provider-owned table of rows/columns.
This is intentionally minimal: it only stores rows, column specs, and
optional metadata used by renderers. It does not auto-normalize legacy
objects or infer columns.
"""
provider: str
rows: List[ResultModel]
columns: List[ColumnSpec]
meta: Dict[str, Any] = field(default_factory=dict)
def __post_init__(self) -> None:
if not str(self.provider or "").strip():
raise ValueError("provider required for ResultTable")
object.__setattr__(self, "rows", [ensure_result_model(r) for r in self.rows])
if not self.columns:
raise ValueError("columns are required for ResultTable")
object.__setattr__(self, "columns", list(self.columns))
object.__setattr__(self, "meta", dict(self.meta or {}))
def serialize_row(self, row: ResultModel, selection: Optional[List[str]] = None) -> Dict[str, Any]:
"""Convert a row into pipeline-friendly dict (with selection args).
Selection args must be precomputed by the provider; this method only
copies them into the serialized dict.
"""
r = ensure_result_model(row)
return {
"title": r.title,
"path": r.path,
"ext": r.ext,
"size_bytes": r.size_bytes,
"metadata": r.metadata or {},
"source": r.source or self.provider,
"_selection_args": list(selection or []),
}
@dataclass(frozen=True) @dataclass(frozen=True)
class ColumnSpec: class ColumnSpec:
"""Specification for a column that renderers will use. """Specification for a column that renderers will use.
@@ -100,6 +142,7 @@ def metadata_column(key: str, header: Optional[str] = None, format_fn: Optional[
__all__ = [ __all__ = [
"ResultModel", "ResultModel",
"ResultTable",
"ColumnSpec", "ColumnSpec",
"ProviderAdapter", "ProviderAdapter",
"Renderer", "Renderer",

View File

@@ -9,7 +9,7 @@ from __future__ import annotations
from typing import Any, Dict, Iterable, Optional from typing import Any, Dict, Iterable, Optional
from SYS.result_table_api import ColumnSpec, ResultModel, Renderer from SYS.result_table_api import ColumnSpec, ResultModel, ResultTable, Renderer
class RichRenderer(Renderer): class RichRenderer(Renderer):
@@ -65,3 +65,22 @@ def render_to_console(rows: Iterable[ResultModel], columns: Iterable[ColumnSpec]
table = RichRenderer().render(rows, columns, meta) table = RichRenderer().render(rows, columns, meta)
Console().print(table) Console().print(table)
def render_result_table(table: ResultTable, renderer: Optional[Renderer] = None) -> Any:
"""Render a ResultTable with the provided renderer (RichRenderer by default)."""
rend = renderer or RichRenderer()
return rend.render(table.rows, table.columns, table.meta)
def render_result_table_to_console(table: ResultTable, renderer: Optional[Renderer] = None) -> None:
try:
from rich.console import Console
except Exception:
for r in table.rows:
print(" ".join(str((col.extractor(r) or "")) for col in table.columns))
return
console = Console()
console.print(render_result_table(table, renderer))

View File

@@ -66,6 +66,24 @@ def sanitize_metadata_value(value: Any) -> str | None:
return value return value
def sanitize_filename(name: str, *, max_len: int = 150) -> str:
"""Return a filesystem-safe filename derived from *name*.
Replaces characters that are invalid on Windows with underscores and
collapses whitespace. Trims trailing periods and enforces a max length.
"""
text = str(name or "").strip()
if not text:
return "download"
forbidden = set('<>:"/\\|?*')
cleaned = "".join("_" if c in forbidden else c for c in text)
cleaned = " ".join(cleaned.split()).strip().strip(".")
if not cleaned:
cleaned = "download"
return cleaned[:max_len]
def unique_preserve_order(values: Iterable[str]) -> list[str]: def unique_preserve_order(values: Iterable[str]) -> list[str]:
seen: set[str] = set() seen: set[str] = set()
ordered: list[str] = [] ordered: list[str] = []

View File

@@ -374,3 +374,61 @@ class Store:
return bool(ok) if ok is not None else True return bool(ok) if ok is not None else True
except Exception: except Exception:
return False return False
def list_configured_backend_names(config: Optional[Dict[str, Any]]) -> list[str]:
"""Return backend instance names present in the provided config WITHOUT instantiating backends.
This is a lightweight helper for CLI usage where we only need to know if a
configured backend exists (e.g., to distinguish a store name from a filesystem path)
without triggering backend initialization (which may perform network calls).
Behaviour:
- For each configured store type, returns the per-instance NAME override (case-insensitive)
when present, otherwise the instance key.
- Includes a 'temp' alias when a folder backend points to the configured 'temp' path.
"""
try:
store_cfg = (config or {}).get("store") or {}
if not isinstance(store_cfg, dict):
return []
names: list[str] = []
for raw_store_type, instances in store_cfg.items():
if not isinstance(instances, dict):
continue
for instance_name, instance_config in instances.items():
if isinstance(instance_config, dict):
override_name = _get_case_insensitive(dict(instance_config), "NAME")
if override_name:
names.append(str(override_name))
else:
names.append(str(instance_name))
else:
names.append(str(instance_name))
# Best-effort: alias 'temp' when a folder backend points at config['temp']
try:
temp_value = (config or {}).get("temp")
if temp_value:
temp_path = str(Path(str(temp_value)).expanduser().resolve())
for raw_store_type, instances in store_cfg.items():
if not isinstance(instances, dict):
continue
if _normalize_store_type(str(raw_store_type)) != "folder":
continue
for instance_name, instance_config in instances.items():
if not isinstance(instance_config, dict):
continue
path_value = instance_config.get("PATH") or instance_config.get("path")
if not path_value:
continue
if str(Path(str(path_value)).expanduser().resolve()) == temp_path:
if "temp" not in names:
names.append("temp")
except Exception:
pass
return sorted(set(names))
except Exception:
return []

View File

@@ -219,17 +219,18 @@ class SharedArgs:
SharedArgs.STORE.choices = SharedArgs.get_store_choices(config) SharedArgs.STORE.choices = SharedArgs.get_store_choices(config)
""" """
try: try:
from Store import Store # Use the non-instantiating helper so autocomplete doesn't trigger backend init.
from Store.registry import list_configured_backend_names
# If no config provided, try to load it # If no config provided, try to load it
if config is None: if config is None:
try: try:
from SYS.config import load_config from SYS.config import load_config
config = load_config()
except Exception: except Exception:
return [] return []
store = Store(config) return list_configured_backend_names(config)
return store.list_backends()
except Exception: except Exception:
# Fallback to empty list if FileStorage isn't available # Fallback to empty list if FileStorage isn't available
return [] return []

View File

@@ -321,9 +321,11 @@ class Add_File(Cmdlet):
is_storage_backend_location = False is_storage_backend_location = False
if location: if location:
try: try:
store_probe = Store(config) # Use a config-only check to avoid instantiating backends (which may perform network checks).
from Store.registry import list_configured_backend_names
is_storage_backend_location = location in ( is_storage_backend_location = location in (
store_probe.list_backends() or [] list_configured_backend_names(config) or []
) )
except Exception: except Exception:
is_storage_backend_location = False is_storage_backend_location = False

View File

@@ -70,6 +70,7 @@ class Download_File(Cmdlet):
"download-http"], "download-http"],
arg=[ arg=[
SharedArgs.URL, SharedArgs.URL,
SharedArgs.PROVIDER,
SharedArgs.PATH, SharedArgs.PATH,
SharedArgs.QUERY, SharedArgs.QUERY,
# Prefer -path for output directory to match other cmdlets; keep -output for backwards compatibility. # Prefer -path for output directory to match other cmdlets; keep -output for backwards compatibility.
@@ -121,6 +122,7 @@ class Download_File(Cmdlet):
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Main execution method.""" """Main execution method."""
debug(f"[download-file] run invoked with args: {list(args)}")
return self._run_impl(result, args, config) return self._run_impl(result, args, config)
@staticmethod @staticmethod
@@ -889,7 +891,7 @@ class Download_File(Cmdlet):
return expanded_items return expanded_items
def _process_provider_items( def _process_provider_items(self,
*, *,
piped_items: Sequence[Any], piped_items: Sequence[Any],
final_output_dir: Path, final_output_dir: Path,

View File

@@ -1,7 +1,7 @@
from __future__ import annotations from __future__ import annotations
from typing import Any, Dict, Iterable, Optional, Sequence import sys
from pathlib import Path from typing import Any, Dict, Iterable, Sequence
from . import _shared as sh from . import _shared as sh
from SYS.logger import log, debug from SYS.logger import log, debug
@@ -68,47 +68,34 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 1 return 1
items = inputs items = inputs
# Build rows
try: try:
rows = list(provider.adapter(items)) table = provider.build_table(items)
except Exception as exc: except Exception as exc:
log(f"Provider adapter failed: {exc}", file=sys.stderr) log(f"Provider '{provider.name}' failed: {exc}", file=sys.stderr)
return 1 return 1
cols = provider.get_columns(rows)
# Emit rows for downstream pipeline consumption (pipable behavior). # Emit rows for downstream pipeline consumption (pipable behavior).
try: try:
for r in rows: for item in provider.serialize_rows(table.rows):
try: try:
item = {
"title": getattr(r, "title", None) or None,
"path": getattr(r, "path", None) or None,
"ext": getattr(r, "ext", None) or None,
"size_bytes": getattr(r, "size_bytes", None) or None,
"metadata": getattr(r, "metadata", None) or {},
"source": getattr(r, "source", None) or provider.name,
"_selection_args": provider.selection_args(r),
}
ctx.emit(item) ctx.emit(item)
except Exception: except Exception:
# Best-effort: continue emitting other rows
continue continue
except Exception: except Exception:
# Non-fatal: continue to rendering even if emission fails # Non-fatal: rendering still happens
pass pass
# Render using RichRenderer # Render using RichRenderer
try: try:
table = RichRenderer().render(rows, cols, provider.metadata) renderable = RichRenderer().render(table.rows, table.columns, table.meta)
try: try:
from rich.console import Console from rich.console import Console
Console().print(table) Console().print(renderable)
except Exception: except Exception:
# Fallback to simple printing # Fallback to simple printing
for r in rows: for r in table.rows:
print(" ".join(str((c.extractor(r) or "")) for c in cols)) print(" ".join(str((c.extractor(r) or "")) for c in table.columns))
except Exception as exc: except Exception as exc:
log(f"Rendering failed: {exc}", file=sys.stderr) log(f"Rendering failed: {exc}", file=sys.stderr)
return 1 return 1
@@ -123,11 +110,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log("Invalid -select value; must be an integer", file=sys.stderr) log("Invalid -select value; must be an integer", file=sys.stderr)
return 1 return 1
if select_idx < 0 or select_idx >= len(rows): if select_idx < 0 or select_idx >= len(table.rows):
log("-select out of range", file=sys.stderr) log("-select out of range", file=sys.stderr)
return 1 return 1
selected = rows[select_idx] selected = table.rows[select_idx]
sel_args = provider.selection_args(selected) sel_args = provider.selection_args(selected)
if not run_cmd: if not run_cmd:

View File

@@ -40,7 +40,7 @@ from SYS import pipeline as pipeline_context
# Playwright & Screenshot Dependencies # Playwright & Screenshot Dependencies
# ============================================================================ # ============================================================================
from tool.playwright import HAS_PLAYWRIGHT, PlaywrightTimeoutError, PlaywrightTool from tool.playwright import PlaywrightTimeoutError, PlaywrightTool
try: try:
from SYS.config import resolve_output_dir from SYS.config import resolve_output_dir
@@ -853,12 +853,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}") log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0 return 0
if not HAS_PLAYWRIGHT:
log(
"playwright is required for screenshot capture; install with: pip install playwright; then: playwright install",
file=sys.stderr,
)
return 1
progress = PipelineProgress(pipeline_context) progress = PipelineProgress(pipeline_context)

View File

@@ -241,6 +241,16 @@ class search_file(Cmdlet):
else: else:
provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider" provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
normalized_query = str(query or "").strip()
provider_filters: Dict[str, Any] = {}
try:
normalized_query, provider_filters = provider.extract_query_arguments(query)
except Exception:
provider_filters = {}
normalized_query = (normalized_query or "").strip()
query = normalized_query or "*"
provider_filters = dict(provider_filters or {})
if provider_lower == "alldebrid" and effective_open_id is not None: if provider_lower == "alldebrid" and effective_open_id is not None:
table_title = f"{provider_label} Files: {effective_open_id}".strip().rstrip(":") table_title = f"{provider_label} Files: {effective_open_id}".strip().rstrip(":")
else: else:
@@ -267,17 +277,22 @@ class search_file(Cmdlet):
table.set_table_metadata(table_meta) table.set_table_metadata(table_meta)
except Exception: except Exception:
pass pass
if provider_lower == "vimm":
# Keep auto-staged download-file from inheriting raw query tokens;
# only propagate provider hint so @N expands to a clean downloader call.
table.set_source_command("search-file", ["-provider", provider_name])
else:
table.set_source_command("search-file", list(args_list)) table.set_source_command("search-file", list(args_list))
debug(f"[search-file] Calling {provider_name}.search()") search_filters = dict(provider_filters)
debug(f"[search-file] Calling {provider_name}.search(filters={search_filters})")
if provider_lower == "alldebrid": if provider_lower == "alldebrid":
filters = {"view": "folders"}
search_open_id = parsed_open_id if parsed_open_id is not None else open_id search_open_id = parsed_open_id if parsed_open_id is not None else open_id
view_value = "files" if search_open_id is not None else "folders"
search_filters["view"] = view_value
if search_open_id is not None: if search_open_id is not None:
filters = {"view": "files", "magnet_id": search_open_id} search_filters["magnet_id"] = search_open_id
results = provider.search(query, limit=limit, filters=filters) results = provider.search(query, limit=limit, filters=search_filters or None)
else:
results = provider.search(query, limit=limit)
debug(f"[search-file] {provider_name} -> {len(results or [])} result(s)") debug(f"[search-file] {provider_name} -> {len(results or [])} result(s)")
# HIFI artist UX: if there is exactly one artist match, auto-expand # HIFI artist UX: if there is exactly one artist match, auto-expand
@@ -342,6 +357,10 @@ class search_file(Cmdlet):
if "table" not in item_dict: if "table" not in item_dict:
item_dict["table"] = table_type item_dict["table"] = table_type
# Ensure provider source is present so downstream cmdlets (select) can resolve provider
if "source" not in item_dict:
item_dict["source"] = provider_name
row_index = len(table.rows) row_index = len(table.rows)
table.add_result(search_result) table.add_result(search_result)

View File

@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import sys
from typing import Any, Dict, List, Sequence from typing import Any, Dict, List, Sequence
from . import _shared as sh from . import _shared as sh
from SYS.logger import log, debug from SYS.logger import log, debug
@@ -89,28 +90,22 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log("No input provided to select; pipe provider-table output or use a cmdlet that emits items.", file=sys.stderr) log("No input provided to select; pipe provider-table output or use a cmdlet that emits items.", file=sys.stderr)
return 1 return 1
# Model-ize items
rows = [_dict_to_result_model(item if isinstance(item, dict) else item) for item in inputs]
# Attempt to detect provider from first item
provider = None
first_src = inputs[0].get("source") if isinstance(inputs[0], dict) else None first_src = inputs[0].get("source") if isinstance(inputs[0], dict) else None
if first_src: if not first_src:
log("Input items must include 'source' to resolve provider for selection.", file=sys.stderr)
return 1
try: try:
provider = get_provider(first_src) provider = get_provider(first_src)
except Exception: except Exception:
provider = None log(f"Unknown provider: {first_src}", file=sys.stderr)
return 1
# Columns: ask provider for column spec if available, else build minimal columns # Model-ize items
if provider: rows = [_dict_to_result_model(item if isinstance(item, dict) else item) for item in inputs]
# Columns: provider must supply them (no legacy defaults)
cols = provider.get_columns(rows) cols = provider.get_columns(rows)
else:
# Minimal columns built from available keys
from SYS.result_table_api import title_column, ext_column
cols = [title_column()]
if any(r.ext for r in rows):
cols.append(ext_column())
# Render table to console # Render table to console
try: try:
@@ -172,26 +167,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"source": raw.source, "source": raw.source,
} }
else: else:
# try to call to_dict or fallback
try: try:
selected = raw.to_dict() selected = raw.to_dict()
except Exception: except Exception:
selected = {"title": getattr(raw, "title", str(raw))} selected = {"title": getattr(raw, "title", str(raw))}
# Ensure selection args exist # Ensure selection args exist using provider's selector only
if not selected.get("_selection_args"): if not selected.get("_selection_args"):
if provider:
try: try:
sel_args = provider.selection_args(rows[idx]) sel_args = provider.selection_args(rows[idx])
selected["_selection_args"] = sel_args selected["_selection_args"] = sel_args
except Exception: except Exception:
selected["_selection_args"] = [] log("Selection args missing and provider selector failed.", file=sys.stderr)
else: return 1
# fallback
if selected.get("path"):
selected["_selection_args"] = ["-path", selected.get("path")]
else:
selected["_selection_args"] = ["-title", selected.get("title") or ""]
selected_items.append(selected) selected_items.append(selected)
except Exception: except Exception:

9
docs/CHANGELOG.md Normal file
View File

@@ -0,0 +1,9 @@
# Changelog
## Unreleased (2026-01-05)
- **docs:** Add `docs/provider_authoring.md` with a Quick Start, examples, and testing guidance for providers that integrate with the strict `ResultTable` API (ResultModel/ColumnSpec/selection_fn).
- **docs:** Add link to `docs/result_table.md` pointing to the provider authoring guide.
- **tests:** Add `tests/test_provider_author_examples.py` validating example provider registration and adapter behavior.
- **notes:** Existing example providers (`Provider/example_provider.py`, `Provider/vimm.py`) are referenced as canonical patterns.

View File

@@ -0,0 +1,15 @@
PR Title: docs: Add Provider authoring doc, examples, and tests
Summary:
- Add `docs/provider_authoring.md` describing the strict `ResultModel`-based provider adapter pattern, `ColumnSpec` usage, `selection_fn`, and `TableProviderMixin` for HTML table scraping.
- Link new doc from `docs/result_table.md`.
- Add `tests/test_provider_author_examples.py` to validate `Provider/example_provider.py` and `Provider/vimm.py` integration with the registry.
Why:
- Provide a short, focused Quick Start to help contributors author providers that integrate with the new strict ResultTable API.
Testing:
- New tests pass locally (provider-related subset).
Notes:
- The change is documentation-first and non-functional, with tests ensuring examples remain valid.

141
docs/provider_authoring.md Normal file
View File

@@ -0,0 +1,141 @@
# Provider authoring: ResultTable & provider adapters ✅
This short guide explains how to write providers that integrate with the *strict* ResultTable API: adapters must yield `ResultModel` instances and providers register via `SYS.result_table_adapters.register_provider` with a column specification and a `selection_fn`.
---
## Quick summary
- Providers register a *provider adapter* (callable that yields `ResultModel`).
- Providers must also provide `columns` (static list or factory) and a `selection_fn` that returns CLI args for a selected row.
- For simple HTML table/list scraping, prefer `TableProviderMixin` from `SYS.provider_helpers` to fetch and extract rows using `SYS.html_table.extract_records`.
## Runtime dependency policy
- Treat required runtime dependencies (e.g., **Playwright**) as mandatory: import them unconditionally and let missing dependencies fail fast at import time. Avoid adding per-call try/except import guards for required modules—these silently hide configuration errors and add bloat.
- Use guarded imports only for truly optional dependencies (e.g., `pandas` for enhanced table parsing) and provide meaningful fallbacks or helpful error messages in those cases.
- Keep provider code minimal and explicit: fail early and document required runtime dependencies in README/installation notes.
---
## Minimal provider template (copy/paste)
```py
# Provider/my_provider.py
from typing import Any, Dict, Iterable, List
from SYS.result_table_api import ResultModel, ColumnSpec, title_column, metadata_column
from SYS.result_table_adapters import register_provider
# Example adapter: convert provider-specific items into ResultModel instances
SAMPLE_ITEMS = [
{"name": "Example File.pdf", "path": "https://example.com/x.pdf", "ext": "pdf", "size": 1024, "source": "myprovider"},
]
def adapter(items: Iterable[Dict[str, Any]]) -> Iterable[ResultModel]:
for it in items:
title = it.get("name") or it.get("title") or str(it.get("path") or "")
yield ResultModel(
title=str(title),
path=str(it.get("path")) if it.get("path") else None,
ext=str(it.get("ext")) if it.get("ext") else None,
size_bytes=int(it.get("size")) if it.get("size") is not None else None,
metadata=dict(it),
source=str(it.get("source")) if it.get("source") else "myprovider",
)
# Optional: build columns dynamically from sample rows
def columns_factory(rows: List[ResultModel]) -> List[ColumnSpec]:
cols = [title_column()]
# add extra columns if metadata keys exist
if any((r.metadata or {}).get("size") for r in rows):
cols.append(ColumnSpec("size", "Size", lambda r: r.size_bytes or ""))
return cols
# Selection args for `@N` expansion or `select` cmdlet
def selection_fn(row: ResultModel) -> List[str]:
# prefer -path when available
if row.path:
return ["-path", row.path]
return ["-title", row.title or ""]
# Register provider (done at import time)
register_provider("myprovider", adapter, columns=columns_factory, selection_fn=selection_fn)
```
---
## Table scraping: using TableProviderMixin (HTML tables / list-results)
If your provider scrapes HTML tables or list-like results (common on web search pages), use `TableProviderMixin`:
```py
from ProviderCore.base import Provider
from SYS.provider_helpers import TableProviderMixin
class MyTableProvider(TableProviderMixin, Provider):
URL = ("https://example.org/search",)
def validate(self) -> bool:
return True
def search(self, query: str, limit: int = 50, **kwargs):
url = f"{self.URL[0]}?q={quote_plus(query)}"
return self.search_table_from_url(url, limit=limit)
```
`TableProviderMixin.search_table_from_url` returns `ProviderCore.base.SearchResult` entries. If you want to integrate this provider with the strict `ResultTable` registry, add a small adapter that converts `SearchResult` -> `ResultModel` and register it using `register_provider` (see `Provider/vimm.py` for a real example).
---
## Columns & selection
- `columns` may be a static `List[ColumnSpec]` or a factory `def cols(rows: List[ResultModel]) -> List[ColumnSpec]` that inspects sample rows.
- `selection_fn` must accept a `ResultModel` and return a `List[str]` representing CLI args (e.g., `['-path', row.path]`). These args are used by `select` and `@N` expansion.
**Tip:** for providers that produce downloadable file rows prefer returning explicit URL args (e.g., `['-url', row.path]`) so the selected URL is clearly identified by downstream downloaders and to avoid ambiguous parsing when provider hints (like `-provider`) are present.
- Ensure your `ResultModel.source` is set (either in the model or rely on the provider name set by `serialize_row`).
---
## Optional: pandas path for `<table>` extraction
`SYS.html_table.extract_records` prefers a pure-lxml path but will use `pandas.read_html` if pandas is installed and the helper detects it works for the input table. This is optional and **not required** to author a provider — document in your provider whether it requires `pandas` and add an informative error/log message when it is missing.
---
## Testing & examples
- Write `tests/test_provider_<name>.py` that imports your provider and verifies `provider.build_table(...)` produces a `ResultTable` (has `.rows` and `.columns`) and that `serialize_rows()` yields dicts with `_selection_args`, `_selection_action` when applicable, and `source`.
- When you need to guarantee a specific CLI stage sequence (e.g., `download-file -url <path> -provider <name>`), call `table.set_row_selection_action(row_index, tokens)` so the serialized payload emits `_selection_action` and the CLI can run the row exactly as intended.
- For table providers you can test `search_table_from_url` using a local HTML fixture or by mocking `HTTPClient` to return a small sample page.
- If you rely on pandas, add a test that monkeypatches `sys.modules['pandas']` to a simple shim to validate the pandas path.
**Example test skeleton**
```py
from SYS.result_table_adapters import get_provider
from Provider import example_provider
def test_example_provider_registration():
provider = get_provider("example")
rows = list(provider.adapter(example_provider.SAMPLE_ITEMS))
assert rows and rows[0].title
cols = provider.get_columns(rows)
assert any(c.name == "title" for c in cols)
table = provider.build_table(example_provider.SAMPLE_ITEMS)
assert table.provider == "example" and table.rows
```
---
## References & examples
- Read `Provider/example_provider.py` for a compact example of a strict adapter and dynamic columns.
- Read `Provider/vimm.py` for a table-provider that uses `TableProviderMixin` and converts `SearchResult``ResultModel` for registration.
- See `docs/provider_guide.md` for a broader provider development checklist.
---
If you want, I can also add a small `Provider/myprovider_template.py` file and unit tests for it — say the word and I'll add them and wire up tests. 🎯

View File

@@ -13,10 +13,11 @@ This document explains the `ResultTable` system used across the CLI and TUI: how
- **ResultTable** (`SYS/result_table.py`) - **ResultTable** (`SYS/result_table.py`)
- Renders rows as a rich table and stores metadata used for selection expansion. - Renders rows as a rich table and stores metadata used for selection expansion.
- Important APIs: `add_result()`, `set_table()`, `set_source_command()`, `set_row_selection_args()`, `set_table_metadata()`, and `select_interactive()`. - Important APIs: `add_result()`, `set_table()`, `set_source_command()`, `set_row_selection_args()`, `set_row_selection_action()`, `set_table_metadata()`, and `select_interactive()`.
- **ResultRow** - **ResultRow**
- Holds columns plus `selection_args` (used for `@N` expansion) and `payload` (original object). - Holds columns plus `selection_args` (used for `@N` expansion) and `payload` (original object).
- Optionally stores `selection_action`, a full list of CLI tokens to run when `@N` selects this row. When present the CLI honors the explicit action instead of reconstructing it from `source_command` and `selection_args`.
- **Provider selector** - **Provider selector**
- If a provider implements `selector(selected_items, ctx=..., stage_is_last=True)`, it is run first when `@N` is used; if the selector returns `True` it has handled the selection (e.g., drilling into a folder and publishing a new ResultTable). - If a provider implements `selector(selected_items, ctx=..., stage_is_last=True)`, it is run first when `@N` is used; if the selector returns `True` it has handled the selection (e.g., drilling into a folder and publishing a new ResultTable).
@@ -112,7 +113,7 @@ SearchResult(
) )
``` ```
Illustrative file SearchResult (after drilling): 4. Otherwise, for single selections, CLI checks for `row.selection_action` and runs that verbatim if present; otherwise it expands `source_command + source_args + row_selection_args`. For multi-selections, items are piped downstream.
```py ```py
SearchResult( SearchResult(
@@ -217,6 +218,8 @@ Notes:
--- ---
For more detail on ResultTable provider authoring, see `docs/provider_authoring.md`.
If you'd like, I can also: If you'd like, I can also:
- Add provider-specific examples (AllDebrid, Bandcamp) into this doc ✅ - Add provider-specific examples (AllDebrid, Bandcamp) into this doc ✅
- Add a short checklist for PR reviewers when adding new providers - Add a short checklist for PR reviewers when adding new providers

60
scripts/README.md Normal file
View File

@@ -0,0 +1,60 @@
Playwright fetch helper
This helper uses Playwright to drive a browser to click the download button on a Vimm detail page and save the resulting file to disk.
Usage examples
Programmatic usage
- Basic example (Python):
```py
from tool.playwright import PlaywrightTool
tool = PlaywrightTool({})
result = tool.download_file("https://vimm.net/vault/48075", selector="form#dl_form button[type=submit]", out_dir=None, timeout_sec=60)
if result.ok:
print(result.path)
else:
print("Download failed:", result.error)
```
- Shell one-liners (PowerShell / Unix compatible):
- PowerShell:
```powershell
python - <<'PY'
from tool.playwright import PlaywrightTool
r = PlaywrightTool().download_file("https://vimm.net/vault/48075")
print(r.to_dict())
PY
```
- Unix shell:
```sh
python -c "from tool.playwright import PlaywrightTool; import json; r=PlaywrightTool().download_file('https://vimm.net/vault/48075'); print(json.dumps(r.to_dict()))"
```
- Download to a specific directory:
```py
tool.download_file("https://vimm.net/vault/48075", out_dir="C:\\tmp")
```
- Pipe the result into `add-file`:
Use one of the shell one-liners above and extract the `path` field from the returned JSON to pass to `CLI.py add-file`. For example, in Unix:
```sh
python -c "from tool.playwright import PlaywrightTool, json; r=PlaywrightTool().download_file('https://vimm.net/vault/48075'); print(r.to_dict())" | jq -r .path | xargs -I{} python CLI.py add-file -store default -path "{}"
```
Notes
- The script prints a single JSON line to stdout on completion. On success, `ok` is true and `path` contains the saved file path.
- Provider `Provider.vimm` will use Playwright when HTTP GET fails (4xx/5xx) or on network errors. Playwright is a required runtime dependency for these flows.
- Playwright must be available in the current Python environment; install with `pip install playwright && playwright install`.

View File

@@ -0,0 +1,9 @@
import importlib, traceback
try:
m = importlib.import_module('Provider.vimm')
print('Imported', m)
print('Vimm class:', getattr(m, 'Vimm', None))
except Exception as e:
print('Import failed:', e)
traceback.print_exc()

View File

@@ -0,0 +1,4 @@
from ProviderCore.registry import list_search_providers, list_providers
print('Search providers:', list_search_providers())
print('All providers:', list_providers())

View File

@@ -1,10 +0,0 @@
from pathlib import Path
path = Path("ProviderCore/registry.py")
text = path.read_text()
marker = '"""Provider registry.'
first = text.find(marker)
second = text.find(marker, first + 1)
if second != -1:
trimmed = text[:second].rstrip() + "\n"
path.write_text(trimmed, encoding="utf-8")

View File

@@ -1,3 +0,0 @@
from pathlib import Path
new_content = """"""

View File

@@ -1,29 +1,24 @@
from __future__ import annotations from __future__ import annotations
import contextlib import contextlib
import re
import tempfile
import traceback
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Dict, Iterator, Optional from pathlib import Path
from typing import Any, Dict, Iterator, Optional, Union
from SYS.logger import debug from SYS.logger import debug
try: from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError from playwright.sync_api import sync_playwright
from playwright.sync_api import sync_playwright
HAS_PLAYWRIGHT = True
_PLAYWRIGHT_IMPORT_ERROR: Optional[Exception] = None
except Exception as exc: # pragma: no cover
HAS_PLAYWRIGHT = False
_PLAYWRIGHT_IMPORT_ERROR = exc
PlaywrightTimeoutError = TimeoutError # type: ignore
sync_playwright = None # type: ignore
# Re-export for consumers (e.g. cmdlets catching navigation timeouts) # Re-export for consumers (e.g. cmdlets catching navigation timeouts)
__all__ = [ __all__ = [
"HAS_PLAYWRIGHT",
"PlaywrightTimeoutError", "PlaywrightTimeoutError",
"PlaywrightTool", "PlaywrightTool",
"PlaywrightDefaults" "PlaywrightDefaults",
"PlaywrightDownloadResult",
] ]
@@ -36,6 +31,36 @@ def _get_nested(config: Dict[str, Any], *path: str) -> Any:
return cur return cur
def _resolve_out_dir(arg_outdir: Optional[Union[str, Path]]) -> Path:
"""Resolve an output directory using config when possible."""
if arg_outdir:
p = Path(arg_outdir)
p.mkdir(parents=True, exist_ok=True)
return p
try:
from SYS.config import load_config, resolve_output_dir
cfg = load_config()
p = resolve_output_dir(cfg)
try:
p.mkdir(parents=True, exist_ok=True)
except Exception:
pass
return p
except Exception:
return Path(tempfile.mkdtemp(prefix="pwdl_"))
def _find_filename_from_cd(cd: str) -> Optional[str]:
if not cd:
return None
m = re.search(r"filename\*?=(?:UTF-8''\s*)?\"?([^\";]+)\"?", cd)
if m:
return m.group(1)
return None
@dataclass(slots=True) @dataclass(slots=True)
class PlaywrightDefaults: class PlaywrightDefaults:
browser: str = "chromium" # chromium|firefox|webkit browser: str = "chromium" # chromium|firefox|webkit
@@ -51,6 +76,24 @@ class PlaywrightDefaults:
ignore_https_errors: bool = True ignore_https_errors: bool = True
@dataclass(slots=True)
class PlaywrightDownloadResult:
ok: bool
path: Optional[Path] = None
url: Optional[str] = None
mode: Optional[str] = None
error: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {
"ok": bool(self.ok),
"path": str(self.path) if self.path else None,
"url": self.url,
"mode": self.mode,
"error": self.error,
}
class PlaywrightTool: class PlaywrightTool:
"""Small wrapper to standardize Playwright defaults and lifecycle. """Small wrapper to standardize Playwright defaults and lifecycle.
@@ -130,12 +173,12 @@ class PlaywrightTool:
) )
def require(self) -> None: def require(self) -> None:
if HAS_PLAYWRIGHT and sync_playwright is not None: """Ensure Playwright is present; raise a helpful RuntimeError if not."""
return try:
detail = str(_PLAYWRIGHT_IMPORT_ERROR or "playwright is not installed") assert sync_playwright is not None
except Exception:
raise RuntimeError( raise RuntimeError(
"playwright is required; install with: pip install playwright; then: playwright install\n" "playwright is required; install with: pip install playwright; then: playwright install"
f"detail: {detail}"
) )
@contextlib.contextmanager @contextlib.contextmanager
@@ -147,6 +190,7 @@ class PlaywrightTool:
viewport_width: Optional[int] = None, viewport_width: Optional[int] = None,
viewport_height: Optional[int] = None, viewport_height: Optional[int] = None,
ignore_https_errors: Optional[bool] = None, ignore_https_errors: Optional[bool] = None,
accept_downloads: bool = False,
) -> Iterator[Any]: ) -> Iterator[Any]:
"""Context manager yielding a Playwright page with sane defaults.""" """Context manager yielding a Playwright page with sane defaults."""
self.require() self.require()
@@ -198,6 +242,7 @@ class PlaywrightTool:
"height": vh "height": vh
}, },
"ignore_https_errors": ihe, "ignore_https_errors": ihe,
"accept_downloads": bool(accept_downloads),
} }
if ua_value is not None: if ua_value is not None:
context_kwargs["user_agent"] = ua_value context_kwargs["user_agent"] = ua_value
@@ -233,6 +278,146 @@ class PlaywrightTool:
except Exception: except Exception:
raise raise
def download_file(
self,
url: str,
*,
selector: str = "form#dl_form button[type=submit]",
out_dir: Optional[Union[str, Path]] = None,
timeout_sec: int = 60,
headless_first: bool = False,
debug_mode: bool = False,
) -> PlaywrightDownloadResult:
"""Download a file by clicking a selector and capturing the response.
The helper mirrors the standalone `scripts/playwright_fetch.py` logic
and tries multiple click strategies (expect_download, tooltip continue,
submitDL, JS/mouse click) to coax stubborn sites.
"""
try:
self.require()
except Exception as exc:
return PlaywrightDownloadResult(ok=False, error=str(exc))
out_path_base = _resolve_out_dir(out_dir)
timeout_ms = max(10_000, int(timeout_sec) * 1000 if timeout_sec is not None else int(self.defaults.navigation_timeout_ms))
nav_timeout_ms = max(timeout_ms, int(self.defaults.navigation_timeout_ms))
selector_timeout_ms = 10_000
# Preserve legacy behaviour: headless_first=False tries headful then headless; True reverses the order.
order = [True, False] if headless_first else [False, True]
seen = set()
modes = []
for m in order:
if m in seen:
continue
seen.add(m)
modes.append(m)
last_error: Optional[str] = None
for mode in modes:
try:
if debug_mode:
debug(f"[playwright] download url={url} selector={selector} headless={mode} out_dir={out_path_base}")
with self.open_page(headless=mode, accept_downloads=True) as page:
page.goto(url, wait_until="networkidle", timeout=nav_timeout_ms)
page.wait_for_selector(selector, timeout=selector_timeout_ms)
self._wait_for_block_clear(page, timeout_ms=6000)
el = page.query_selector(selector)
# 1) Direct click with expect_download
try:
with page.expect_download(timeout=timeout_ms) as dl_info:
if el:
el.click()
else:
page.click(selector)
dl = dl_info.value
filename = dl.suggested_filename or Path(dl.url).name or "download"
out_path = out_path_base / filename
dl.save_as(str(out_path))
return PlaywrightDownloadResult(ok=True, path=out_path, url=dl.url, mode="download")
except PlaywrightTimeoutError:
last_error = "download timeout"
except Exception as click_exc:
last_error = str(click_exc) or last_error
# 2) Tooltip continue flow
try:
btn = page.query_selector("#tooltip4 input[type=button]")
if btn:
btn.click()
with page.expect_download(timeout=timeout_ms) as dl_info:
if el:
el.click()
else:
page.click(selector)
dl = dl_info.value
filename = dl.suggested_filename or Path(dl.url).name or "download"
out_path = out_path_base / filename
dl.save_as(str(out_path))
return PlaywrightDownloadResult(ok=True, path=out_path, url=dl.url, mode="tooltip-download")
except Exception as tooltip_exc:
last_error = str(tooltip_exc) or last_error
# 3) Submit handler that respects tooltip flow
try:
page.evaluate("() => { try { submitDL(document.forms['dl_form'], 'tooltip4'); } catch (e) {} }")
resp = page.wait_for_response(
lambda r: r.status == 200 and any(k.lower() == 'content-disposition' for k in r.headers.keys()),
timeout=timeout_ms,
)
if resp:
out_path = self._save_response(resp, out_path_base)
if out_path:
return PlaywrightDownloadResult(ok=True, path=out_path, url=getattr(resp, "url", None), mode="response")
except Exception as resp_exc:
last_error = str(resp_exc) or last_error
# 4) JS/mouse click and capture response
try:
if el:
try:
page.evaluate("el => el.click()", el)
except Exception:
page.evaluate(f"() => document.querySelector('{selector}').click()")
else:
page.evaluate(f"() => document.querySelector('{selector}').click()")
if el:
try:
box = el.bounding_box()
if box:
page.mouse.move(box['x'] + box['width'] / 2, box['y'] + box['height'] / 2)
page.mouse.click(box['x'] + box['width'] / 2, box['y'] + box['height'] / 2)
except Exception:
pass
resp = page.wait_for_response(
lambda r: r.status == 200 and any(k.lower() == 'content-disposition' for k in r.headers.keys()),
timeout=timeout_ms,
)
if resp:
out_path = self._save_response(resp, out_path_base)
if out_path:
return PlaywrightDownloadResult(ok=True, path=out_path, url=getattr(resp, "url", None), mode="response-fallback")
except Exception as final_exc:
last_error = str(final_exc) or last_error
except Exception as exc:
last_error = str(exc)
if debug_mode:
try:
debug(f"[playwright] attempt failed (headless={mode}): {traceback.format_exc()}")
except Exception:
pass
continue
return PlaywrightDownloadResult(ok=False, error=last_error or "no download captured")
def debug_dump(self) -> None: def debug_dump(self) -> None:
try: try:
debug( debug(
@@ -242,3 +427,34 @@ class PlaywrightTool:
) )
except Exception: except Exception:
pass pass
def _wait_for_block_clear(self, page: Any, timeout_ms: int = 8000) -> bool:
try:
page.wait_for_function(
"() => { for (const k in window) { if (Object.prototype.hasOwnProperty.call(window, k) && k.startsWith('blocked_')) { try { return window[k] === false; } catch(e) {} return false; } } return true; }",
timeout=timeout_ms,
)
return True
except Exception:
return False
def _save_response(self, response: Any, out_dir: Path) -> Optional[Path]:
try:
cd = ""
try:
headers = getattr(response, "headers", {}) or {}
cd = "".join([v for k, v in headers.items() if str(k).lower() == "content-disposition"])
except Exception:
cd = ""
filename = _find_filename_from_cd(cd) or Path(str(getattr(response, "url", "") or "")).name or "download"
body = response.body()
out_path = out_dir / filename
out_path.write_bytes(body)
return out_path
except Exception as exc:
try:
debug(f"[playwright] failed to save response: {exc}")
except Exception:
pass
return None