This commit is contained in:
nose
2025-12-16 23:23:43 -08:00
parent 9873280f0e
commit 86918f2ae2
46 changed files with 2277 additions and 1347 deletions

11
tool/__init__.py Normal file
View File

@@ -0,0 +1,11 @@
"""Tool helpers.
This package contains wrappers around external tools (e.g. yt-dlp) so cmdlets can share
common defaults (cookies, timeouts, format selectors) and users can override them via
`config.conf`.
"""
from .ytdlp import YtDlpTool, YtDlpDefaults
from .playwright import PlaywrightTool, PlaywrightDefaults
__all__ = ["YtDlpTool", "YtDlpDefaults", "PlaywrightTool", "PlaywrightDefaults"]

203
tool/playwright.py Normal file
View File

@@ -0,0 +1,203 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, Iterator, Optional
from SYS.logger import debug
try:
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
from playwright.sync_api import sync_playwright
HAS_PLAYWRIGHT = True
_PLAYWRIGHT_IMPORT_ERROR: Optional[Exception] = None
except Exception as exc: # pragma: no cover
HAS_PLAYWRIGHT = False
_PLAYWRIGHT_IMPORT_ERROR = exc
PlaywrightTimeoutError = TimeoutError # type: ignore
sync_playwright = None # type: ignore
# Re-export for consumers (e.g. cmdlets catching navigation timeouts)
__all__ = ["HAS_PLAYWRIGHT", "PlaywrightTimeoutError", "PlaywrightTool", "PlaywrightDefaults"]
def _get_nested(config: Dict[str, Any], *path: str) -> Any:
cur: Any = config
for key in path:
if not isinstance(cur, dict):
return None
cur = cur.get(key)
return cur
@dataclass(slots=True)
class PlaywrightDefaults:
browser: str = "chromium" # chromium|firefox|webkit
headless: bool = True
user_agent: str = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
viewport_width: int = 1280
viewport_height: int = 1200
navigation_timeout_ms: int = 90_000
ignore_https_errors: bool = True
class PlaywrightTool:
"""Small wrapper to standardize Playwright defaults and lifecycle.
This is meant to keep cmdlets/providers from duplicating:
- sync_playwright start/stop
- browser launch/context creation
- user-agent/viewport defaults
Config overrides (top-level keys):
- playwright.browser="chromium"
- playwright.headless=true
- playwright.user_agent="..."
- playwright.viewport_width=1280
- playwright.viewport_height=1200
- playwright.navigation_timeout_ms=90000
- playwright.ignore_https_errors=true
"""
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
self._config: Dict[str, Any] = dict(config or {})
self.defaults = self._load_defaults()
def _load_defaults(self) -> PlaywrightDefaults:
cfg = self._config
tool_block = _get_nested(cfg, "tool", "playwright")
if not isinstance(tool_block, dict):
tool_block = {}
pw_block = cfg.get("playwright") if isinstance(cfg.get("playwright"), dict) else {}
if not isinstance(pw_block, dict):
pw_block = {}
def _get(name: str, fallback: Any) -> Any:
val = tool_block.get(name)
if val is None:
val = pw_block.get(name)
if val is None:
val = cfg.get(f"playwright_{name}")
if val is None:
val = _get_nested(cfg, "playwright", name)
return fallback if val is None else val
browser = str(_get("browser", PlaywrightDefaults.browser)).strip().lower() or "chromium"
if browser not in {"chromium", "firefox", "webkit"}:
browser = "chromium"
headless_raw = _get("headless", PlaywrightDefaults.headless)
headless = bool(headless_raw)
ua = str(_get("user_agent", PlaywrightDefaults.user_agent))
def _int(name: str, fallback: int) -> int:
raw = _get(name, fallback)
try:
return int(raw)
except Exception:
return fallback
vw = _int("viewport_width", PlaywrightDefaults.viewport_width)
vh = _int("viewport_height", PlaywrightDefaults.viewport_height)
nav_timeout = _int("navigation_timeout_ms", PlaywrightDefaults.navigation_timeout_ms)
ignore_https = bool(_get("ignore_https_errors", PlaywrightDefaults.ignore_https_errors))
return PlaywrightDefaults(
browser=browser,
headless=headless,
user_agent=ua,
viewport_width=vw,
viewport_height=vh,
navigation_timeout_ms=nav_timeout,
ignore_https_errors=ignore_https,
)
def require(self) -> None:
if HAS_PLAYWRIGHT and sync_playwright is not None:
return
detail = str(_PLAYWRIGHT_IMPORT_ERROR or "playwright is not installed")
raise RuntimeError(
"playwright is required; install with: pip install playwright; then: playwright install\n"
f"detail: {detail}"
)
def open_page(
self,
*,
headless: Optional[bool] = None,
user_agent: Optional[str] = None,
viewport_width: Optional[int] = None,
viewport_height: Optional[int] = None,
ignore_https_errors: Optional[bool] = None,
) -> Iterator[Any]:
"""Context manager yielding a Playwright page with sane defaults."""
self.require()
h = self.defaults.headless if headless is None else bool(headless)
ua = self.defaults.user_agent if user_agent is None else str(user_agent)
vw = self.defaults.viewport_width if viewport_width is None else int(viewport_width)
vh = self.defaults.viewport_height if viewport_height is None else int(viewport_height)
ihe = self.defaults.ignore_https_errors if ignore_https_errors is None else bool(ignore_https_errors)
pw = None
browser = None
context = None
try:
assert sync_playwright is not None
pw = sync_playwright().start()
browser_type = getattr(pw, self.defaults.browser, None)
if browser_type is None:
browser_type = pw.chromium
browser = browser_type.launch(
headless=h,
args=["--disable-blink-features=AutomationControlled"],
)
context = browser.new_context(
user_agent=ua,
viewport={"width": vw, "height": vh},
ignore_https_errors=ihe,
)
page = context.new_page()
yield page
finally:
try:
if context is not None:
context.close()
except Exception:
pass
try:
if browser is not None:
browser.close()
except Exception:
pass
try:
if pw is not None:
pw.stop()
except Exception:
pass
def goto(self, page: Any, url: str) -> None:
"""Navigate with configured timeout."""
try:
page.goto(url, timeout=int(self.defaults.navigation_timeout_ms), wait_until="domcontentloaded")
except Exception:
raise
def debug_dump(self) -> None:
try:
debug(
f"[playwright] browser={self.defaults.browser} headless={self.defaults.headless} "
f"viewport={self.defaults.viewport_width}x{self.defaults.viewport_height} "
f"nav_timeout_ms={self.defaults.navigation_timeout_ms}"
)
except Exception:
pass

195
tool/ytdlp.py Normal file
View File

@@ -0,0 +1,195 @@
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence
from SYS.logger import debug
def _get_nested(config: Dict[str, Any], *path: str) -> Any:
cur: Any = config
for key in path:
if not isinstance(cur, dict):
return None
cur = cur.get(key)
return cur
def _parse_csv_list(value: Any) -> Optional[List[str]]:
if value is None:
return None
if isinstance(value, list):
out: List[str] = []
for item in value:
s = str(item).strip()
if s:
out.append(s)
return out or None
s = str(value).strip()
if not s:
return None
# allow either JSON-ish list strings or simple comma-separated values
if s.startswith("[") and s.endswith("]"):
s = s[1:-1]
parts = [p.strip() for p in s.split(",")]
parts = [p for p in parts if p]
return parts or None
@dataclass(slots=True)
class YtDlpDefaults:
"""User-tunable defaults for yt-dlp behavior.
Recommended config.conf keys (top-level dotted keys):
- ytdlp.video_format="bestvideo+bestaudio/best"
- ytdlp.audio_format="251/140/bestaudio"
- ytdlp.format_sort="res:2160,res:1440,res:1080,res:720,res"
Cookies:
- cookies="C:\\path\\cookies.txt" (already supported by config.resolve_cookies_path)
"""
video_format: str = "bestvideo+bestaudio/best"
audio_format: str = "251/140/bestaudio"
format_sort: Optional[List[str]] = None
class YtDlpTool:
"""Centralizes yt-dlp defaults and translation helpers.
This is intentionally small and dependency-light so cmdlets can use it without
forcing a full refactor.
"""
def __init__(self, config: Optional[Dict[str, Any]] = None, *, script_dir: Optional[Path] = None) -> None:
self._config: Dict[str, Any] = dict(config or {})
# `resolve_cookies_path` expects the app root so it can fall back to ./cookies.txt.
# This file lives under ./tool/, so default to the parent directory.
self._script_dir = script_dir or Path(__file__).resolve().parent.parent
self.defaults = self._load_defaults()
self._cookiefile: Optional[Path] = self._init_cookiefile()
def _init_cookiefile(self) -> Optional[Path]:
"""Resolve cookies once at tool init (yt-dlp is the primary consumer)."""
try:
from config import resolve_cookies_path
resolved = resolve_cookies_path(self._config, script_dir=self._script_dir)
if resolved is not None and resolved.is_file():
return resolved
except Exception:
pass
return None
def _load_defaults(self) -> YtDlpDefaults:
cfg = self._config
tool_block = _get_nested(cfg, "tool", "ytdlp")
if not isinstance(tool_block, dict):
tool_block = {}
ytdlp_block = cfg.get("ytdlp") if isinstance(cfg.get("ytdlp"), dict) else {}
if not isinstance(ytdlp_block, dict):
ytdlp_block = {}
# Accept both nested and flat styles.
video_format = (
tool_block.get("video_format")
or tool_block.get("format")
or ytdlp_block.get("video_format")
or ytdlp_block.get("video")
or ytdlp_block.get("format_video")
or cfg.get("ytdlp_video_format")
)
audio_format = (
tool_block.get("audio_format")
or ytdlp_block.get("audio_format")
or ytdlp_block.get("audio")
or ytdlp_block.get("format_audio")
or cfg.get("ytdlp_audio_format")
)
# Also accept dotted keys written as nested dicts: ytdlp.format.video, ytdlp.format.audio
nested_video = _get_nested(cfg, "ytdlp", "format", "video")
nested_audio = _get_nested(cfg, "ytdlp", "format", "audio")
fmt_sort_val = (
tool_block.get("format_sort")
or ytdlp_block.get("format_sort")
or ytdlp_block.get("formatSort")
or cfg.get("ytdlp_format_sort")
or _get_nested(cfg, "ytdlp", "format", "sort")
)
fmt_sort = _parse_csv_list(fmt_sort_val)
defaults = YtDlpDefaults(
video_format=str(nested_video or video_format or YtDlpDefaults.video_format),
audio_format=str(nested_audio or audio_format or YtDlpDefaults.audio_format),
format_sort=fmt_sort,
)
return defaults
def resolve_cookiefile(self) -> Optional[Path]:
return self._cookiefile
def default_format(self, mode: str) -> str:
m = str(mode or "").lower().strip()
if m == "audio":
return self.defaults.audio_format
return self.defaults.video_format
def build_yt_dlp_cli_args(
self,
*,
url: str,
output_dir: Optional[Path] = None,
ytdl_format: Optional[str] = None,
playlist_items: Optional[str] = None,
no_playlist: bool = False,
quiet: bool = True,
extra_args: Optional[Sequence[str]] = None,
) -> List[str]:
"""Build a yt-dlp command line (argv list).
This is primarily for debug output or subprocess execution.
"""
argv: List[str] = ["yt-dlp"]
if quiet:
argv.extend(["--quiet", "--no-warnings"])
argv.append("--no-progress")
cookiefile = self.resolve_cookiefile()
if cookiefile is not None:
argv.extend(["--cookies", str(cookiefile)])
if no_playlist:
argv.append("--no-playlist")
if playlist_items:
argv.extend(["--playlist-items", str(playlist_items)])
fmt = (ytdl_format or "").strip()
if fmt:
# Use long form to avoid confusion with app-level flags.
argv.extend(["--format", fmt])
if self.defaults.format_sort:
for sort_key in self.defaults.format_sort:
argv.extend(["-S", sort_key])
if output_dir is not None:
outtmpl = str((output_dir / "%(title)s.%(ext)s").resolve())
argv.extend(["-o", outtmpl])
if extra_args:
argv.extend([str(a) for a in extra_args if str(a).strip()])
argv.append(str(url))
return argv
def debug_print_cli(self, argv: Sequence[str]) -> None:
try:
debug("yt-dlp argv: " + " ".join(str(a) for a in argv))
except Exception:
pass