HTTP: prefer pip-system-certs/certifi_win32 bundle; use init-time verify in retries; add tests

This commit is contained in:
2026-01-05 13:09:24 -08:00
parent 1f765cffda
commit ac1d1d634f
12 changed files with 19424 additions and 2371 deletions

View File

@@ -14,7 +14,8 @@ import sys
import time
import traceback
import re
from typing import Optional, Dict, Any, Callable, BinaryIO, List, Iterable, Set
import os
from typing import Optional, Dict, Any, Callable, BinaryIO, List, Iterable, Set, Union
from pathlib import Path
from urllib.parse import unquote, urlparse, parse_qs
import logging
@@ -30,6 +31,116 @@ except Exception: # pragma: no cover - optional dependency
logger = logging.getLogger(__name__)
def _resolve_verify_value(verify_ssl: bool) -> Union[bool, str]:
"""Return the httpx verify argument, preferring system-aware bundles.
Order of precedence:
1. If verify_ssl is not True (False or path), return it.
2. Respect existing SSL_CERT_FILE env var if present.
3. Prefer `pip_system_certs` if present and it exposes a bundle path.
4. Prefer `certifi_win32`/similar helpers by invoking them and reading certifi.where().
5. Fall back to `certifi.where()` if available.
6. Otherwise, return True to let httpx use system defaults.
"""
if verify_ssl is not True:
return verify_ssl
env_cert = os.environ.get("SSL_CERT_FILE")
if env_cert:
return env_cert
def _try_module_bundle(mod_name: str) -> Optional[str]:
try:
mod = __import__(mod_name)
except Exception:
return None
# Common APIs that return a bundle path
for attr in ("where", "get_ca_bundle", "bundle_path", "get_bundle_path", "get_bundle"):
fn = getattr(mod, attr, None)
if callable(fn):
try:
res = fn()
if res:
return res
except Exception:
continue
elif isinstance(fn, str) and fn:
return fn
# Some helpers (e.g., certifi_win32) expose an action to merge system certs
for call_attr in ("add_windows_store_certs", "add_system_certs", "merge_system_certs"):
fn = getattr(mod, call_attr, None)
if callable(fn):
try:
fn()
try:
import certifi as _certifi
res = _certifi.where()
if res:
return res
except Exception:
pass
except Exception:
pass
return None
# Prefer pip_system_certs if available
for mod_name in ("pip_system_certs",):
path = _try_module_bundle(mod_name)
if path:
try:
os.environ["SSL_CERT_FILE"] = path
except Exception:
pass
logger.info(f"SSL_CERT_FILE not set; using bundle from {mod_name}: {path}")
return path
# Special-case helpers that merge system certs (eg. certifi_win32)
try:
import certifi_win32 as _cw # type: ignore
if hasattr(_cw, "add_windows_store_certs") and callable(_cw.add_windows_store_certs):
try:
_cw.add_windows_store_certs()
except Exception:
pass
try:
import certifi # type: ignore
path = certifi.where()
if path:
try:
os.environ["SSL_CERT_FILE"] = path
except Exception:
pass
logger.info(
f"SSL_CERT_FILE not set; using certifi bundle after certifi_win32: {path}"
)
return path
except Exception:
pass
except Exception:
pass
# Fallback to certifi
try:
import certifi # type: ignore
path = certifi.where()
if path:
try:
os.environ["SSL_CERT_FILE"] = path
except Exception:
pass
logger.info(f"SSL_CERT_FILE not set; using certifi bundle: {path}")
return path
except Exception:
pass
return True
# Default configuration
DEFAULT_TIMEOUT = 30.0
DEFAULT_RETRIES = 3
@@ -65,11 +176,13 @@ class HTTPClient:
self.base_headers = headers or {}
self._client: Optional[httpx.Client] = None
self._httpx_verify = _resolve_verify_value(verify_ssl)
def __enter__(self):
"""Context manager entry."""
self._client = httpx.Client(
timeout=self.timeout,
verify=self.verify_ssl,
verify=self._httpx_verify,
headers=self._get_headers(),
)
return self
@@ -351,6 +464,53 @@ class HTTPClient:
logger.warning(
f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}"
)
# Detect certificate verification failures in the underlying error
msg = str(e or "").lower()
if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg):
logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle")
try:
import httpx as _httpx
# Use the client's precomputed verify argument (set at init)
verify_override = self._httpx_verify
with _httpx.Client(timeout=self.timeout, verify=verify_override, headers=self._get_headers()) as temp_client:
try:
response = temp_client.request(method, url, **kwargs)
if raise_for_status:
response.raise_for_status()
return response
except Exception as e2:
last_exception = e2
except Exception:
# certifi/pip-system-certs/httpx not available; fall back to existing retry behavior
pass
if attempt < self.retries - 1:
continue
except Exception as e:
# Catch-all to handle non-httpx exceptions that may represent
# certificate verification failures from underlying transports.
last_exception = e
logger.warning(f"Request exception on attempt {attempt + 1}/{self.retries}: {url} - {e}")
msg = str(e or "").lower()
if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg):
logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle")
try:
import httpx as _httpx
# Use the client's precomputed verify argument (set at init)
verify_override = self._httpx_verify
with _httpx.Client(timeout=self.timeout, verify=verify_override, headers=self._get_headers()) as temp_client:
try:
response = temp_client.request(method, url, **kwargs)
if raise_for_status:
response.raise_for_status()
return response
except Exception as e2:
last_exception = e2
except Exception:
# certifi/pip-system-certs/httpx not available; fall back to existing retry behavior
pass
if attempt < self.retries - 1:
continue
@@ -761,12 +921,13 @@ class AsyncHTTPClient:
self.verify_ssl = verify_ssl
self.base_headers = headers or {}
self._client: Optional[httpx.AsyncClient] = None
self._httpx_verify = _resolve_verify_value(verify_ssl)
async def __aenter__(self):
"""Async context manager entry."""
self._client = httpx.AsyncClient(
timeout=self.timeout,
verify=self.verify_ssl,
verify=self._httpx_verify,
headers=self._get_headers(),
)
return self

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,259 @@
"""Example provider that uses the new `ResultTable` API.
This module demonstrates a minimal provider adapter that yields `ResultModel`
instances, a set of `ColumnSpec` definitions, and a tiny CLI-friendly renderer
(`render_table`) for demonstration.
Run this to see sample output:
python -m Provider.example_provider
Example usage (piped selector):
provider-table -provider example -sample | select -select 1 | add-file -store default
"""
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, Iterable, List
from SYS.result_table_api import ColumnSpec, ResultModel, title_column, ext_column
SAMPLE_ITEMS = [
{
"name": "Book of Awe.pdf",
"path": "sample/Book of Awe.pdf",
"ext": "pdf",
"size": 1024000,
"source": "example",
},
{
"name": "Song of Joy.mp3",
"path": "sample/Song of Joy.mp3",
"ext": "mp3",
"size": 5120000,
"source": "example",
},
{
"name": "Cover Image.jpg",
"path": "sample/Cover Image.jpg",
"ext": "jpg",
"size": 20480,
"source": "example",
},
]
def adapter(items: Iterable[Dict[str, Any]]) -> Iterable[ResultModel]:
"""Convert provider-specific items into `ResultModel` instances.
This adapter enforces the strict API requirement: it yields only
`ResultModel` instances (no legacy dict objects).
"""
for it in items:
title = it.get("name") or it.get("title") or (Path(str(it.get("path"))).stem if it.get("path") else "")
yield ResultModel(
title=str(title),
path=str(it.get("path")) if it.get("path") else None,
ext=str(it.get("ext")) if it.get("ext") else None,
size_bytes=int(it.get("size")) if it.get("size") is not None else None,
metadata=dict(it),
source=str(it.get("source")) if it.get("source") else "example",
)
# Columns are intentionally *not* mandated. Create a factory that inspects
# sample rows and builds only columns that make sense for the provider data.
from SYS.result_table_api import metadata_column
def columns_factory(rows: List[ResultModel]) -> List[ColumnSpec]:
cols: List[ColumnSpec] = [title_column()]
# If any row has an extension, include Ext column
if any(getattr(r, "ext", None) for r in rows):
cols.append(ext_column())
# If any row has size, include Size column
if any(getattr(r, "size_bytes", None) for r in rows):
cols.append(ColumnSpec("size", "Size", lambda rr: rr.size_bytes or "", lambda v: _format_size(v)))
# Add any top-level metadata keys discovered (up to 3) as optional columns
seen_keys = []
for r in rows:
for k in (r.metadata or {}).keys():
if k in ("name", "title", "path"):
continue
if k not in seen_keys:
seen_keys.append(k)
if len(seen_keys) >= 3:
break
if len(seen_keys) >= 3:
break
for k in seen_keys:
cols.append(metadata_column(k))
return cols
# Selection function: cmdlets rely on this to build selector args when the user
# selects a row (e.g., '@3' -> run next-cmd with the returned args). Prefer
# -path if available, otherwise fall back to -title.
def selection_fn(row: ResultModel) -> List[str]:
if row.path:
return ["-path", row.path]
return ["-title", row.title]
# Register the provider with the registry so callers can discover it by name
from SYS.result_table_adapters import register_provider
register_provider(
"example",
adapter,
columns=columns_factory,
selection_fn=selection_fn,
metadata={"description": "Example provider demonstrating dynamic columns and selectors"},
)
def _format_size(size: Any) -> str:
try:
s = int(size)
except Exception:
return ""
if s >= 1024 ** 3:
return f"{s / (1024 ** 3):.2f} GB"
if s >= 1024 ** 2:
return f"{s / (1024 ** 2):.2f} MB"
if s >= 1024:
return f"{s / 1024:.2f} KB"
return f"{s} B"
def render_table(rows: Iterable[ResultModel], columns: List[ColumnSpec]) -> str:
"""Render a simple ASCII table of `rows` using `columns`.
This is intentionally very small and dependency-free for demonstration.
Renderers in the project should implement the `Renderer` protocol.
"""
rows = list(rows)
# Build cell matrix (strings)
matrix: List[List[str]] = []
for r in rows:
cells: List[str] = []
for col in columns:
raw = col.extractor(r)
if col.format_fn:
try:
cell = col.format_fn(raw)
except Exception:
cell = str(raw or "")
else:
cell = str(raw or "")
cells.append(cell)
matrix.append(cells)
# Compute column widths as max(header, content)
headers = [c.header for c in columns]
widths = [len(h) for h in headers]
for row_cells in matrix:
for i, cell in enumerate(row_cells):
widths[i] = max(widths[i], len(cell))
# Helper to format a row
def fmt_row(cells: List[str]) -> str:
return " | ".join(cell.ljust(widths[i]) for i, cell in enumerate(cells))
lines: List[str] = []
lines.append(fmt_row(headers))
lines.append("-+-".join("-" * w for w in widths))
for row_cells in matrix:
lines.append(fmt_row(row_cells))
return "\n".join(lines)
# Rich-based renderer (returns a Rich Table renderable)
def render_table_rich(rows: Iterable[ResultModel], columns: List[ColumnSpec]):
"""Render rows as a `rich.table.Table` for terminal output.
Returns the Table object; callers may `Console.print(table)` to render.
"""
try:
from rich.table import Table as RichTable
except Exception as exc: # pragma: no cover - rare if rich missing
raise RuntimeError("rich is required for rich renderer") from exc
table = RichTable(show_header=True, header_style="bold")
for col in columns:
table.add_column(col.header)
for r in rows:
cells: List[str] = []
for col in columns:
raw = col.extractor(r)
if col.format_fn:
try:
cell = col.format_fn(raw)
except Exception:
cell = str(raw or "")
else:
cell = str(raw or "")
cells.append(cell)
table.add_row(*cells)
return table
def demo() -> None:
rows = list(adapter(SAMPLE_ITEMS))
table = render_table_rich(rows, columns_factory(rows))
try:
from rich.console import Console
except Exception:
# Fall back to plain printing if rich is not available
print("Example provider output:")
print(render_table(rows, columns_factory(rows)))
return
console = Console()
console.print("Example provider output:")
console.print(table)
def demo_with_selection(idx: int = 0) -> None:
"""Demonstrate how a cmdlet would use provider registration and selection args.
- Fetch the registered provider by name
- Build rows via adapter
- Render the table
- Show the selection args for the chosen row; these are the args a cmdlet
would append when the user picks that row.
"""
from SYS.result_table_adapters import get_provider
provider = get_provider("example")
rows = list(provider.adapter(SAMPLE_ITEMS))
cols = provider.get_columns(rows)
# Render
try:
from rich.console import Console
except Exception:
print(render_table(rows, cols))
sel_args = provider.selection_args(rows[idx])
print("Selection args for row", idx, "->", sel_args)
return
console = Console()
console.print("Example provider output:")
console.print(render_table_rich(rows, cols))
# Selection args example
sel = provider.selection_args(rows[idx])
console.print("Selection args for row", idx, "->", sel)
if __name__ == "__main__":
demo()

View File

@@ -5,7 +5,7 @@ starting point for implementing a full Vimm (vimm.net) provider.
It prefers server-rendered HTML parsing via lxml and uses the repo's
`HTTPClient` helper for robust HTTP calls (timeouts/retries).
Selectors in `search()` are intentionally permissive heuristics; update the
XPaths to match the real site HTML when you have an actual fixture.
"""
@@ -78,6 +78,11 @@ class Vimm(Provider):
resp = client.get(url)
content = resp.content
except Exception as exc:
# Log and return empty results on failure. The HTTP client will
# already attempt a certifi-based retry in common certificate
# verification failure cases; if you still see cert errors, install
# the `certifi` package or configure SSL_CERT_FILE to point at a
# valid CA bundle.
log(f"[vimm] HTTP fetch failed: {exc}", file=sys.stderr)
return []
@@ -183,3 +188,104 @@ class Vimm(Provider):
continue
return results[: max(0, int(limit))]
# Bridge into the ResultTable provider registry so vimm results can be rendered
# with the new provider/table/select API.
try:
from SYS.result_table_adapters import register_provider
from SYS.result_table_api import ResultModel
from SYS.result_table_api import title_column, ext_column, metadata_column
def _convert_search_result_to_model(sr):
try:
if hasattr(sr, "to_dict"):
d = sr.to_dict()
elif isinstance(sr, dict):
d = sr
else:
d = {
"title": getattr(sr, "title", str(sr)),
"path": getattr(sr, "path", None),
"size_bytes": getattr(sr, "size_bytes", None),
"columns": getattr(sr, "columns", None),
"full_metadata": getattr(sr, "full_metadata", None),
}
except Exception:
d = {"title": getattr(sr, "title", str(sr))}
title = d.get("title") or ""
path = d.get("path") or None
size = d.get("size_bytes") or None
ext = None
try:
if path:
from pathlib import Path
suf = Path(str(path)).suffix
if suf:
ext = suf.lstrip(".")
except Exception:
ext = None
metadata = d.get("full_metadata") or d.get("metadata") or {}
return ResultModel(
title=str(title),
path=str(path) if path is not None else None,
ext=str(ext) if ext is not None else None,
size_bytes=int(size) if size is not None else None,
metadata=metadata or {},
source="vimm",
)
def _adapter(items):
for it in items:
yield _convert_search_result_to_model(it)
def _columns_factory(rows):
cols = [title_column()]
if any(getattr(r, "ext", None) for r in rows):
cols.append(ext_column())
if any(getattr(r, "size_bytes", None) for r in rows):
cols.append(metadata_column("size", "Size"))
# Add up to 2 discovered metadata keys from rows
seen = []
for r in rows:
for k in (r.metadata or {}).keys():
if k in ("name", "title", "path"):
continue
if k not in seen:
seen.append(k)
if len(seen) >= 2:
break
if len(seen) >= 2:
break
for k in seen:
cols.append(metadata_column(k))
return cols
def _selection_fn(row):
if getattr(row, "path", None):
return ["-path", row.path]
return ["-title", row.title or ""]
SAMPLE_ITEMS = [
{"title": "Room of Awe", "path": "sample/Room of Awe", "ext": "zip", "size_bytes": 1024 * 1024 * 12, "full_metadata": {"platform": "PC"}},
{"title": "Song of Joy", "path": "sample/Song of Joy.mp3", "ext": "mp3", "size_bytes": 5120000, "full_metadata": {"platform": "PC"}},
{"title": "Cover Image", "path": "sample/Cover.jpg", "ext": "jpg", "size_bytes": 20480, "full_metadata": {}},
]
try:
register_provider(
"vimm",
_adapter,
columns=_columns_factory,
selection_fn=_selection_fn,
metadata={"description": "Vimm provider bridge (ProviderCore -> ResultTable API)"},
)
except Exception:
# Non-fatal: registration is best-effort
pass
except Exception:
pass

View File

@@ -0,0 +1,80 @@
"""Provider registry for ResultTable API (breaking, strict API).
Providers register themselves here with an adapter and optional column factory
and selection function. Consumers (cmdlets) can look up providers by name and
obtain the columns and selection behavior for building tables and for selection
args used by subsequent cmdlets.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Callable, Dict, Iterable, List, Optional, Union
from SYS.result_table_api import ColumnSpec, ProviderAdapter, ResultModel
ColumnFactory = Callable[[Iterable[ResultModel]], List[ColumnSpec]]
SelectionFn = Callable[[ResultModel], List[str]]
@dataclass
class Provider:
name: str
adapter: ProviderAdapter
# columns can be a static list or a factory that derives columns from sample rows
columns: Optional[Union[List[ColumnSpec], ColumnFactory]] = None
selection_fn: Optional[SelectionFn] = None
metadata: Optional[Dict[str, Any]] = None
def get_columns(self, rows: Optional[Iterable[ResultModel]] = None) -> List[ColumnSpec]:
if callable(self.columns):
try:
rows_list = list(rows) if rows is not None else []
return list(self.columns(rows_list))
except Exception:
# Fall back to a minimal Title column on errors
return [ColumnSpec("title", "Title", lambda r: r.title)]
if self.columns is not None:
return list(self.columns)
# Default minimal column set
return [ColumnSpec("title", "Title", lambda r: r.title)]
def selection_args(self, row: ResultModel) -> List[str]:
if callable(self.selection_fn):
try:
return list(self.selection_fn(row))
except Exception:
return []
# Default selector: prefer path flag, then title
if getattr(row, "path", None):
return ["-path", str(row.path)]
return ["-title", str(row.title)]
_PROVIDERS: Dict[str, Provider] = {}
def register_provider(
name: str,
adapter: ProviderAdapter,
*,
columns: Optional[Union[List[ColumnSpec], ColumnFactory]] = None,
selection_fn: Optional[SelectionFn] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> Provider:
name = str(name or "").strip().lower()
if not name:
raise ValueError("provider name required")
if name in _PROVIDERS:
raise ValueError(f"provider already registered: {name}")
p = Provider(name=name, adapter=adapter, columns=columns, selection_fn=selection_fn, metadata=metadata)
_PROVIDERS[name] = p
return p
def get_provider(name: str) -> Provider:
return _PROVIDERS[name.lower()]
def list_providers() -> List[str]:
return list(_PROVIDERS.keys())

109
SYS/result_table_api.py Normal file
View File

@@ -0,0 +1,109 @@
"""ResultTable API types and small helpers (breaking: no legacy compatibility).
This module provides the canonical dataclasses and protocols that providers and
renderers must use. It intentionally refuses to accept legacy dicts/strings/objs
— adapters must produce `ResultModel` instances.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, Iterable, Optional, Protocol
@dataclass(frozen=True)
class ResultModel:
"""Canonical result model that providers must produce.
Fields:
- title: human-friendly title (required)
- path: optional filesystem path/URL
- ext: file extension (without dot), e.g. "pdf", "mp3"
- size_bytes: optional size in bytes
- media_kind: one of 'video','audio','image','doc', etc.
- metadata: arbitrary provider-specific metadata
- source: provider name string
"""
title: str
path: Optional[str] = None
ext: Optional[str] = None
size_bytes: Optional[int] = None
media_kind: Optional[str] = None
metadata: Dict[str, Any] = field(default_factory=dict)
source: Optional[str] = None
@dataclass(frozen=True)
class ColumnSpec:
"""Specification for a column that renderers will use.
extractor: callable that accepts a ResultModel and returns the cell value.
format_fn: optional callable used to format the extracted value to string.
"""
name: str
header: str
extractor: Callable[[ResultModel], Any]
format_fn: Optional[Callable[[Any], str]] = None
width: Optional[int] = None
sortable: bool = False
ProviderAdapter = Callable[[Iterable[Any]], Iterable[ResultModel]]
"""Type for provider adapters.
Adapters must accept provider-specific sequence/iterable and yield
`ResultModel` instances only. Anything else is an error (no implicit normalization).
"""
class Renderer(Protocol):
"""Renderer protocol.
Implementations should accept rows and columns and return a renderable
or side-effect (e.g., print to terminal). Keep implementations deterministic
and side-effect-free when possible (return a Rich renderable object).
"""
def render(self, rows: Iterable[ResultModel], columns: Iterable[ColumnSpec], meta: Optional[Dict[str, Any]] = None) -> Any: # pragma: no cover - interface
...
# Small helper enforcing strict API usage
def ensure_result_model(obj: Any) -> ResultModel:
"""Ensure `obj` is a `ResultModel` instance, else raise TypeError.
This makes the API intentionally strict: providers must construct ResultModel
objects.
"""
if isinstance(obj, ResultModel):
return obj
raise TypeError("ResultModel required; providers must yield ResultModel instances")
# Convenience column spec generators
def title_column() -> ColumnSpec:
return ColumnSpec("title", "Title", lambda r: r.title)
def ext_column() -> ColumnSpec:
return ColumnSpec("ext", "Ext", lambda r: r.ext or "")
# Helper to build a ColumnSpec that extracts a metadata key from ResultModel
def metadata_column(key: str, header: Optional[str] = None, format_fn: Optional[Callable[[Any], str]] = None) -> ColumnSpec:
hdr = header or str(key).replace("_", " ").title()
return ColumnSpec(name=key, header=hdr, extractor=lambda r: (r.metadata or {}).get(key), format_fn=format_fn)
__all__ = [
"ResultModel",
"ColumnSpec",
"ProviderAdapter",
"Renderer",
"ensure_result_model",
"title_column",
"ext_column",
]

View File

@@ -0,0 +1,67 @@
"""Renderers for the ResultTable API.
This module provides a Rich-based Renderer implementation that returns a
`rich.table.Table` renderable. The implementation is intentionally small and
focused on the command-line display use-case; keep logic side-effect-free where
possible and let callers decide whether to `Console.print()` or capture output.
"""
from __future__ import annotations
from typing import Any, Dict, Iterable, Optional
from SYS.result_table_api import ColumnSpec, ResultModel, Renderer
class RichRenderer(Renderer):
"""Rich renderer implementing the `Renderer` protocol.
Usage:
from rich.console import Console
table = RichRenderer().render(rows, columns, meta)
Console().print(table)
"""
def render(self, rows: Iterable[ResultModel], columns: Iterable[ColumnSpec], meta: Optional[Dict[str, Any]] = None) -> Any: # pragma: no cover - simple wrapper
try:
from rich.table import Table as RichTable
except Exception as exc:
raise RuntimeError("rich is required for RichRenderer") from exc
table = RichTable(show_header=True, header_style="bold")
cols = list(columns)
for col in cols:
table.add_column(col.header)
for r in rows:
cells = []
for col in cols:
try:
raw = col.extractor(r)
if col.format_fn:
try:
cell = col.format_fn(raw)
except Exception:
cell = str(raw or "")
else:
cell = str(raw or "")
except Exception:
cell = ""
cells.append(cell)
table.add_row(*cells)
return table
# Small convenience function
def render_to_console(rows: Iterable[ResultModel], columns: Iterable[ColumnSpec], meta: Optional[Dict[str, Any]] = None) -> None:
try:
from rich.console import Console
except Exception:
# If rich isn't present, fall back to simple text output
for r in rows:
print(" ".join(str((col.extractor(r) or "")) for col in columns))
return
table = RichRenderer().render(rows, columns, meta)
Console().print(table)

View File

@@ -128,12 +128,24 @@ def _doc_convert(input_path: Path, output_path: Path) -> bool:
return False
target_fmt = output_path.suffix.lstrip(".").lower() or "pdf"
extra_args = []
if target_fmt == "pdf":
tectonic_path = shutil.which("tectonic")
if not tectonic_path:
log(
"tectonic is required for PDF output; install with `pip install tectonic`",
file=sys.stderr,
)
return False
extra_args = ["--pdf-engine=tectonic"]
try:
pypandoc.convert_file(
str(input_path),
to=target_fmt,
outputfile=str(output_path),
extra_args=extra_args,
)
except OSError as exc:
log(f"pandoc is missing or failed to run: {exc}", file=sys.stderr)
@@ -163,7 +175,7 @@ CMDLET = Cmdlet(
detail=[
"Allows video↔video, audio↔audio, image↔image, doc↔doc, and video→audio conversions.",
"Disallows incompatible conversions (e.g., video→pdf).",
"Uses ffmpeg for media and pypandoc-binary (bundled pandoc) for document formats (mobi/epub→pdf/txt/etc).",
"Uses ffmpeg for media and pypandoc-binary (bundled pandoc) for document formats (mobi/epub→pdf/txt/etc); PDF output uses the tectonic LaTeX engine when available.",
],
)

157
cmdlet/provider_table.py Normal file
View File

@@ -0,0 +1,157 @@
from __future__ import annotations
from typing import Any, Dict, Iterable, Optional, Sequence
from pathlib import Path
from . import _shared as sh
from SYS.logger import log, debug
from SYS import pipeline as ctx
from SYS.result_table_adapters import get_provider
from SYS.result_table_renderers import RichRenderer
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
parse_cmdlet_args = sh.parse_cmdlet_args
CMDLET = Cmdlet(
name="provider-table",
summary="Render a provider's result set and optionally run a follow-up cmdlet using the selected row.",
usage="provider-table -provider <name> [-sample] [-select <n>] [-run-cmd <name>]",
arg=[
CmdletArg("provider", type="string", description="Provider name to render (default: example)"),
CmdletArg("sample", type="flag", description="Use provider sample/demo items when available."),
CmdletArg("select", type="int", description="1-based row index to select and use for follow-up command."),
CmdletArg("run-cmd", type="string", description="Cmdlet to invoke with the selected row's selector args."),
],
detail=[
"Use a registered provider to build a table and optionally run another cmdlet with selection args.",
"Emits pipeline-friendly dicts enriched with `_selection_args` so you can pipe into `select` and other cmdlets.",
"Example: provider-table -provider example -sample | select -select 1 | add-file -store default",
],
)
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
parsed = parse_cmdlet_args(args, CMDLET)
provider_name = parsed.get("provider") or "example"
use_sample = bool(parsed.get("sample", False))
run_cmd = parsed.get("run-cmd")
select_raw = parsed.get("select")
try:
provider = get_provider(provider_name)
except Exception as exc:
log(f"Unknown provider: {provider_name}", file=sys.stderr)
return 1
# Obtain items to feed to the adapter
items = None
if use_sample:
# Try to locate SAMPLE_ITEMS in the adapter's module (convention only)
try:
mod = __import__(provider.adapter.__module__, fromlist=["*"])
items = getattr(mod, "SAMPLE_ITEMS", None)
if items is None:
log("Provider does not expose SAMPLE_ITEMS; no sample available", file=sys.stderr)
return 1
except Exception:
log("Failed to load provider sample", file=sys.stderr)
return 1
else:
# Require input for non-sample runs
inputs = list(result) if isinstance(result, Iterable) else []
if not inputs:
log("No input provided. Use -sample for demo or pipe provider items in.", file=sys.stderr)
return 1
items = inputs
# Build rows
try:
rows = list(provider.adapter(items))
except Exception as exc:
log(f"Provider adapter failed: {exc}", file=sys.stderr)
return 1
cols = provider.get_columns(rows)
# Emit rows for downstream pipeline consumption (pipable behavior).
try:
for r in rows:
try:
item = {
"title": getattr(r, "title", None) or None,
"path": getattr(r, "path", None) or None,
"ext": getattr(r, "ext", None) or None,
"size_bytes": getattr(r, "size_bytes", None) or None,
"metadata": getattr(r, "metadata", None) or {},
"source": getattr(r, "source", None) or provider.name,
"_selection_args": provider.selection_args(r),
}
ctx.emit(item)
except Exception:
# Best-effort: continue emitting other rows
continue
except Exception:
# Non-fatal: continue to rendering even if emission fails
pass
# Render using RichRenderer
try:
table = RichRenderer().render(rows, cols, provider.metadata)
try:
from rich.console import Console
Console().print(table)
except Exception:
# Fallback to simple printing
for r in rows:
print(" ".join(str((c.extractor(r) or "")) for c in cols))
except Exception as exc:
log(f"Rendering failed: {exc}", file=sys.stderr)
return 1
# If no selection requested, we're done
if not select_raw:
return 0
try:
select_idx = int(select_raw) - 1
except Exception:
log("Invalid -select value; must be an integer", file=sys.stderr)
return 1
if select_idx < 0 or select_idx >= len(rows):
log("-select out of range", file=sys.stderr)
return 1
selected = rows[select_idx]
sel_args = provider.selection_args(selected)
if not run_cmd:
# Print selection args for caller
log(f"Selection args: {sel_args}", file=sys.stderr)
return 0
# Run follow-up cmdlet
try:
from cmdlet import ensure_cmdlet_modules_loaded, get as get_cmdlet
ensure_cmdlet_modules_loaded()
cmd_fn = get_cmdlet(run_cmd)
if not cmd_fn:
log(f"Follow-up cmdlet not found: {run_cmd}", file=sys.stderr)
return 1
# Call the cmdlet with no upstream result, but with selection args
ret = cmd_fn(None, sel_args, config or {})
return ret
except Exception as exc:
log(f"Failed to invoke follow-up cmdlet: {exc}", file=sys.stderr)
return 1
CMDLET.exec = _run
CMDLET.register()

238
cmdlet/select_item.py Normal file
View File

@@ -0,0 +1,238 @@
from __future__ import annotations
from typing import Any, Dict, List, Sequence
from . import _shared as sh
from SYS.logger import log, debug
from SYS import pipeline as ctx
from SYS.result_table_api import ResultModel
from SYS.result_table_adapters import get_provider
from SYS.result_table_renderers import RichRenderer
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_result_input = sh.normalize_result_input
CMDLET = Cmdlet(
name="select",
summary="Select items from a piped result set (interactive or via -select) and emit the selected item(s).",
usage="select -select <n|1-3|1,3> [-multi] [-run-cmd <name>]",
arg=[
CmdletArg("select", type="string", description="Selection string (e.g., 1, 2-4)", alias="s"),
CmdletArg("multi", type="flag", description="Allow multiple selections."),
CmdletArg("interactive", type="flag", description="Prompt interactively for selection."),
CmdletArg("run-cmd", type="string", description="Cmdlet to invoke with selected items (each)"),
],
detail=[
"Accepts piped input from provider-table or other sources and emits the selected item(s) as dicts.",
"If -run-cmd is provided, invokes the named cmdlet for each selected item with selector args and the item as piped input.",
],
)
def _parse_selection(selection: str, max_len: int) -> List[int]:
"""Parse a selection string like '1', '1-3', '1,3,5-7' into 0-based indices."""
if not selection:
return []
parts = [p.strip() for p in str(selection).split(",") if p.strip()]
indices = set()
for part in parts:
if "-" in part:
try:
a, b = part.split("-", 1)
start = int(a.strip())
end = int(b.strip())
if start > end:
start, end = end, start
for i in range(start, end + 1):
if 1 <= i <= max_len:
indices.add(i - 1)
except Exception:
raise ValueError(f"Invalid range: {part}")
else:
try:
v = int(part)
if 1 <= v <= max_len:
indices.add(v - 1)
except Exception:
raise ValueError(f"Invalid selection: {part}")
return sorted(indices)
def _dict_to_result_model(d: Dict[str, Any]) -> ResultModel:
if isinstance(d, ResultModel):
return d
# Allow dicts or objects with attributes
title = d.get("title") or d.get("name") or (d.get("path") and str(d.get("path")).split("/")[-1])
return ResultModel(
title=str(title) if title is not None else "",
path=d.get("path") if d.get("path") is not None else None,
ext=d.get("ext") if d.get("ext") is not None else None,
size_bytes=d.get("size_bytes") if d.get("size_bytes") is not None else None,
metadata=d.get("metadata") or {},
source=d.get("source") or None,
)
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
parsed = parse_cmdlet_args(args, CMDLET)
select_raw = parsed.get("select")
allow_multi = bool(parsed.get("multi", False))
interactive = bool(parsed.get("interactive", False))
run_cmd = parsed.get("run-cmd")
inputs = normalize_result_input(result)
if not inputs:
log("No input provided to select; pipe provider-table output or use a cmdlet that emits items.", file=sys.stderr)
return 1
# Model-ize items
rows = [_dict_to_result_model(item if isinstance(item, dict) else item) for item in inputs]
# Attempt to detect provider from first item
provider = None
first_src = inputs[0].get("source") if isinstance(inputs[0], dict) else None
if first_src:
try:
provider = get_provider(first_src)
except Exception:
provider = None
# Columns: ask provider for column spec if available, else build minimal columns
if provider:
cols = provider.get_columns(rows)
else:
# Minimal columns built from available keys
from SYS.result_table_api import title_column, ext_column
cols = [title_column()]
if any(r.ext for r in rows):
cols.append(ext_column())
# Render table to console
try:
table = RichRenderer().render(rows, cols, None)
try:
from rich.console import Console
Console().print(table)
except Exception:
for r in rows:
print(" ".join(str((c.extractor(r) or "")) for c in cols))
except Exception as exc:
log(f"Rendering failed: {exc}", file=sys.stderr)
return 1
# Determine selection indices
indices: List[int] = []
if select_raw:
try:
indices = _parse_selection(str(select_raw), len(rows))
except ValueError as exc:
log(str(exc), file=sys.stderr)
return 1
elif interactive:
# Prompt user (single index only unless multi)
try:
from rich.prompt import Prompt
prompt_text = "Select item(s) (e.g., 1 or 1,3-5)"
if not allow_multi:
prompt_text += " (single)"
choice = Prompt.ask(prompt_text).strip()
indices = _parse_selection(choice, len(rows))
except Exception as exc:
log(f"Interactive selection failed: {exc}", file=sys.stderr)
return 1
else:
log("No selection requested. Use -select or -interactive.", file=sys.stderr)
return 1
if not indices:
log("No valid selection indices provided", file=sys.stderr)
return 1
# Build selected items and emit
selected_items: List[Dict[str, Any]] = []
for idx in indices:
try:
raw = inputs[idx] if idx < len(inputs) else None
if isinstance(raw, dict):
selected = dict(raw)
elif isinstance(raw, ResultModel):
selected = {
"title": raw.title,
"path": raw.path,
"ext": raw.ext,
"size_bytes": raw.size_bytes,
"metadata": raw.metadata or {},
"source": raw.source,
}
else:
# try to call to_dict or fallback
try:
selected = raw.to_dict()
except Exception:
selected = {"title": getattr(raw, "title", str(raw))}
# Ensure selection args exist
if not selected.get("_selection_args"):
if provider:
try:
sel_args = provider.selection_args(rows[idx])
selected["_selection_args"] = sel_args
except Exception:
selected["_selection_args"] = []
else:
# fallback
if selected.get("path"):
selected["_selection_args"] = ["-path", selected.get("path")]
else:
selected["_selection_args"] = ["-title", selected.get("title") or ""]
selected_items.append(selected)
except Exception:
continue
# Emit selected items so downstream cmdlets can consume them
try:
for itm in selected_items:
ctx.emit(itm)
except Exception:
pass
# Optionally run follow-up cmdlet for each selected item
if run_cmd:
try:
from cmdlet import ensure_cmdlet_modules_loaded, get as get_cmdlet
ensure_cmdlet_modules_loaded()
cmd_fn = get_cmdlet(run_cmd)
if not cmd_fn:
log(f"Follow-up cmdlet not found: {run_cmd}", file=sys.stderr)
return 1
exit_code = 0
for itm in selected_items:
sel_args = itm.get("_selection_args") or []
# Invoke follow-up cmdlet with the selected item as piped input
try:
ret = cmd_fn(itm, sel_args, config or {})
except Exception as exc:
log(f"Follow-up cmdlet raised: {exc}", file=sys.stderr)
ret = 1
if ret != 0:
exit_code = ret
return exit_code
except Exception as exc:
log(f"Failed to invoke follow-up cmdlet: {exc}", file=sys.stderr)
return 1
return 0
CMDLET.exec = _run
CMDLET.register()

View File

@@ -0,0 +1,22 @@
Selector & provider-table usage
This project provides a small provider/table/selector flow that allows providers
and cmdlets to interact via a simple, pipable API.
Key ideas
- `provider-table` renders a provider result set and *emits* pipeline-friendly dicts for each row. Each emitted item includes `_selection_args`, a list of args the provider suggests for selecting that row (e.g., `['-path', '/tmp/file']`).
- `select` accepts piped items, displays a table (Rich-based), and supports selecting rows either via `-select` or `-interactive` prompt. Selected items are emitted for downstream cmdlets or you can use `-run-cmd` to invoke another cmdlet for each selected item.
Example (non-interactive):
provider-table -provider example -sample | select -select 1 | add-file -store default
What providers must implement
- An adapter that yields `ResultModel` objects (breaking API).
- Optionally supply a `columns` factory and `selection_fn` (see `Provider/example_provider.py`).
Implementation notes
- `provider-table` emits dicts like `{ 'title': ..., 'path': ..., 'metadata': ..., '_selection_args': [...] }`.
- `select` will prefer `_selection_args` if present; otherwise it will fall back to provider selection logic or sensible defaults (`-path` or `-title`).
This design keeps the selector-focused UX small and predictable while enabling full cmdlet interoperability via piping and `-run-cmd`.

View File

@@ -3,6 +3,8 @@ typer>=0.9.0
rich>=13.7.0
prompt-toolkit>=3.0.0
textual>=0.30.0
pip-system-certs
# Media processing and downloading
yt-dlp[default]>=2023.11.0