jkj
This commit is contained in:
11
tool/__init__.py
Normal file
11
tool/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Tool helpers.
|
||||
|
||||
This package contains wrappers around external tools (e.g. yt-dlp) so cmdlets can share
|
||||
common defaults (cookies, timeouts, format selectors) and users can override them via
|
||||
`config.conf`.
|
||||
"""
|
||||
|
||||
from .ytdlp import YtDlpTool, YtDlpDefaults
|
||||
from .playwright import PlaywrightTool, PlaywrightDefaults
|
||||
|
||||
__all__ = ["YtDlpTool", "YtDlpDefaults", "PlaywrightTool", "PlaywrightDefaults"]
|
||||
203
tool/playwright.py
Normal file
203
tool/playwright.py
Normal file
@@ -0,0 +1,203 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Iterator, Optional
|
||||
|
||||
from SYS.logger import debug
|
||||
|
||||
try:
|
||||
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
HAS_PLAYWRIGHT = True
|
||||
_PLAYWRIGHT_IMPORT_ERROR: Optional[Exception] = None
|
||||
except Exception as exc: # pragma: no cover
|
||||
HAS_PLAYWRIGHT = False
|
||||
_PLAYWRIGHT_IMPORT_ERROR = exc
|
||||
PlaywrightTimeoutError = TimeoutError # type: ignore
|
||||
sync_playwright = None # type: ignore
|
||||
|
||||
|
||||
# Re-export for consumers (e.g. cmdlets catching navigation timeouts)
|
||||
__all__ = ["HAS_PLAYWRIGHT", "PlaywrightTimeoutError", "PlaywrightTool", "PlaywrightDefaults"]
|
||||
|
||||
|
||||
def _get_nested(config: Dict[str, Any], *path: str) -> Any:
|
||||
cur: Any = config
|
||||
for key in path:
|
||||
if not isinstance(cur, dict):
|
||||
return None
|
||||
cur = cur.get(key)
|
||||
return cur
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class PlaywrightDefaults:
|
||||
browser: str = "chromium" # chromium|firefox|webkit
|
||||
headless: bool = True
|
||||
user_agent: str = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
viewport_width: int = 1280
|
||||
viewport_height: int = 1200
|
||||
navigation_timeout_ms: int = 90_000
|
||||
ignore_https_errors: bool = True
|
||||
|
||||
|
||||
class PlaywrightTool:
|
||||
"""Small wrapper to standardize Playwright defaults and lifecycle.
|
||||
|
||||
This is meant to keep cmdlets/providers from duplicating:
|
||||
- sync_playwright start/stop
|
||||
- browser launch/context creation
|
||||
- user-agent/viewport defaults
|
||||
|
||||
Config overrides (top-level keys):
|
||||
- playwright.browser="chromium"
|
||||
- playwright.headless=true
|
||||
- playwright.user_agent="..."
|
||||
- playwright.viewport_width=1280
|
||||
- playwright.viewport_height=1200
|
||||
- playwright.navigation_timeout_ms=90000
|
||||
- playwright.ignore_https_errors=true
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||||
self._config: Dict[str, Any] = dict(config or {})
|
||||
self.defaults = self._load_defaults()
|
||||
|
||||
def _load_defaults(self) -> PlaywrightDefaults:
|
||||
cfg = self._config
|
||||
tool_block = _get_nested(cfg, "tool", "playwright")
|
||||
if not isinstance(tool_block, dict):
|
||||
tool_block = {}
|
||||
pw_block = cfg.get("playwright") if isinstance(cfg.get("playwright"), dict) else {}
|
||||
if not isinstance(pw_block, dict):
|
||||
pw_block = {}
|
||||
|
||||
def _get(name: str, fallback: Any) -> Any:
|
||||
val = tool_block.get(name)
|
||||
if val is None:
|
||||
val = pw_block.get(name)
|
||||
if val is None:
|
||||
val = cfg.get(f"playwright_{name}")
|
||||
if val is None:
|
||||
val = _get_nested(cfg, "playwright", name)
|
||||
return fallback if val is None else val
|
||||
|
||||
browser = str(_get("browser", PlaywrightDefaults.browser)).strip().lower() or "chromium"
|
||||
if browser not in {"chromium", "firefox", "webkit"}:
|
||||
browser = "chromium"
|
||||
|
||||
headless_raw = _get("headless", PlaywrightDefaults.headless)
|
||||
headless = bool(headless_raw)
|
||||
|
||||
ua = str(_get("user_agent", PlaywrightDefaults.user_agent))
|
||||
|
||||
def _int(name: str, fallback: int) -> int:
|
||||
raw = _get(name, fallback)
|
||||
try:
|
||||
return int(raw)
|
||||
except Exception:
|
||||
return fallback
|
||||
|
||||
vw = _int("viewport_width", PlaywrightDefaults.viewport_width)
|
||||
vh = _int("viewport_height", PlaywrightDefaults.viewport_height)
|
||||
nav_timeout = _int("navigation_timeout_ms", PlaywrightDefaults.navigation_timeout_ms)
|
||||
|
||||
ignore_https = bool(_get("ignore_https_errors", PlaywrightDefaults.ignore_https_errors))
|
||||
|
||||
return PlaywrightDefaults(
|
||||
browser=browser,
|
||||
headless=headless,
|
||||
user_agent=ua,
|
||||
viewport_width=vw,
|
||||
viewport_height=vh,
|
||||
navigation_timeout_ms=nav_timeout,
|
||||
ignore_https_errors=ignore_https,
|
||||
)
|
||||
|
||||
def require(self) -> None:
|
||||
if HAS_PLAYWRIGHT and sync_playwright is not None:
|
||||
return
|
||||
detail = str(_PLAYWRIGHT_IMPORT_ERROR or "playwright is not installed")
|
||||
raise RuntimeError(
|
||||
"playwright is required; install with: pip install playwright; then: playwright install\n"
|
||||
f"detail: {detail}"
|
||||
)
|
||||
|
||||
def open_page(
|
||||
self,
|
||||
*,
|
||||
headless: Optional[bool] = None,
|
||||
user_agent: Optional[str] = None,
|
||||
viewport_width: Optional[int] = None,
|
||||
viewport_height: Optional[int] = None,
|
||||
ignore_https_errors: Optional[bool] = None,
|
||||
) -> Iterator[Any]:
|
||||
"""Context manager yielding a Playwright page with sane defaults."""
|
||||
self.require()
|
||||
|
||||
h = self.defaults.headless if headless is None else bool(headless)
|
||||
ua = self.defaults.user_agent if user_agent is None else str(user_agent)
|
||||
vw = self.defaults.viewport_width if viewport_width is None else int(viewport_width)
|
||||
vh = self.defaults.viewport_height if viewport_height is None else int(viewport_height)
|
||||
ihe = self.defaults.ignore_https_errors if ignore_https_errors is None else bool(ignore_https_errors)
|
||||
|
||||
pw = None
|
||||
browser = None
|
||||
context = None
|
||||
try:
|
||||
assert sync_playwright is not None
|
||||
pw = sync_playwright().start()
|
||||
|
||||
browser_type = getattr(pw, self.defaults.browser, None)
|
||||
if browser_type is None:
|
||||
browser_type = pw.chromium
|
||||
|
||||
browser = browser_type.launch(
|
||||
headless=h,
|
||||
args=["--disable-blink-features=AutomationControlled"],
|
||||
)
|
||||
context = browser.new_context(
|
||||
user_agent=ua,
|
||||
viewport={"width": vw, "height": vh},
|
||||
ignore_https_errors=ihe,
|
||||
)
|
||||
page = context.new_page()
|
||||
yield page
|
||||
finally:
|
||||
try:
|
||||
if context is not None:
|
||||
context.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if browser is not None:
|
||||
browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if pw is not None:
|
||||
pw.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def goto(self, page: Any, url: str) -> None:
|
||||
"""Navigate with configured timeout."""
|
||||
try:
|
||||
page.goto(url, timeout=int(self.defaults.navigation_timeout_ms), wait_until="domcontentloaded")
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
def debug_dump(self) -> None:
|
||||
try:
|
||||
debug(
|
||||
f"[playwright] browser={self.defaults.browser} headless={self.defaults.headless} "
|
||||
f"viewport={self.defaults.viewport_width}x{self.defaults.viewport_height} "
|
||||
f"nav_timeout_ms={self.defaults.navigation_timeout_ms}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
195
tool/ytdlp.py
Normal file
195
tool/ytdlp.py
Normal file
@@ -0,0 +1,195 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence
|
||||
|
||||
from SYS.logger import debug
|
||||
|
||||
|
||||
def _get_nested(config: Dict[str, Any], *path: str) -> Any:
|
||||
cur: Any = config
|
||||
for key in path:
|
||||
if not isinstance(cur, dict):
|
||||
return None
|
||||
cur = cur.get(key)
|
||||
return cur
|
||||
|
||||
|
||||
def _parse_csv_list(value: Any) -> Optional[List[str]]:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, list):
|
||||
out: List[str] = []
|
||||
for item in value:
|
||||
s = str(item).strip()
|
||||
if s:
|
||||
out.append(s)
|
||||
return out or None
|
||||
s = str(value).strip()
|
||||
if not s:
|
||||
return None
|
||||
# allow either JSON-ish list strings or simple comma-separated values
|
||||
if s.startswith("[") and s.endswith("]"):
|
||||
s = s[1:-1]
|
||||
parts = [p.strip() for p in s.split(",")]
|
||||
parts = [p for p in parts if p]
|
||||
return parts or None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class YtDlpDefaults:
|
||||
"""User-tunable defaults for yt-dlp behavior.
|
||||
|
||||
Recommended config.conf keys (top-level dotted keys):
|
||||
- ytdlp.video_format="bestvideo+bestaudio/best"
|
||||
- ytdlp.audio_format="251/140/bestaudio"
|
||||
- ytdlp.format_sort="res:2160,res:1440,res:1080,res:720,res"
|
||||
|
||||
Cookies:
|
||||
- cookies="C:\\path\\cookies.txt" (already supported by config.resolve_cookies_path)
|
||||
"""
|
||||
|
||||
video_format: str = "bestvideo+bestaudio/best"
|
||||
audio_format: str = "251/140/bestaudio"
|
||||
format_sort: Optional[List[str]] = None
|
||||
|
||||
|
||||
class YtDlpTool:
|
||||
"""Centralizes yt-dlp defaults and translation helpers.
|
||||
|
||||
This is intentionally small and dependency-light so cmdlets can use it without
|
||||
forcing a full refactor.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None, *, script_dir: Optional[Path] = None) -> None:
|
||||
self._config: Dict[str, Any] = dict(config or {})
|
||||
# `resolve_cookies_path` expects the app root so it can fall back to ./cookies.txt.
|
||||
# This file lives under ./tool/, so default to the parent directory.
|
||||
self._script_dir = script_dir or Path(__file__).resolve().parent.parent
|
||||
self.defaults = self._load_defaults()
|
||||
self._cookiefile: Optional[Path] = self._init_cookiefile()
|
||||
|
||||
def _init_cookiefile(self) -> Optional[Path]:
|
||||
"""Resolve cookies once at tool init (yt-dlp is the primary consumer)."""
|
||||
try:
|
||||
from config import resolve_cookies_path
|
||||
|
||||
resolved = resolve_cookies_path(self._config, script_dir=self._script_dir)
|
||||
if resolved is not None and resolved.is_file():
|
||||
return resolved
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _load_defaults(self) -> YtDlpDefaults:
|
||||
cfg = self._config
|
||||
|
||||
tool_block = _get_nested(cfg, "tool", "ytdlp")
|
||||
if not isinstance(tool_block, dict):
|
||||
tool_block = {}
|
||||
|
||||
ytdlp_block = cfg.get("ytdlp") if isinstance(cfg.get("ytdlp"), dict) else {}
|
||||
if not isinstance(ytdlp_block, dict):
|
||||
ytdlp_block = {}
|
||||
|
||||
# Accept both nested and flat styles.
|
||||
video_format = (
|
||||
tool_block.get("video_format")
|
||||
or tool_block.get("format")
|
||||
or ytdlp_block.get("video_format")
|
||||
or ytdlp_block.get("video")
|
||||
or ytdlp_block.get("format_video")
|
||||
or cfg.get("ytdlp_video_format")
|
||||
)
|
||||
audio_format = (
|
||||
tool_block.get("audio_format")
|
||||
or ytdlp_block.get("audio_format")
|
||||
or ytdlp_block.get("audio")
|
||||
or ytdlp_block.get("format_audio")
|
||||
or cfg.get("ytdlp_audio_format")
|
||||
)
|
||||
|
||||
# Also accept dotted keys written as nested dicts: ytdlp.format.video, ytdlp.format.audio
|
||||
nested_video = _get_nested(cfg, "ytdlp", "format", "video")
|
||||
nested_audio = _get_nested(cfg, "ytdlp", "format", "audio")
|
||||
|
||||
fmt_sort_val = (
|
||||
tool_block.get("format_sort")
|
||||
or ytdlp_block.get("format_sort")
|
||||
or ytdlp_block.get("formatSort")
|
||||
or cfg.get("ytdlp_format_sort")
|
||||
or _get_nested(cfg, "ytdlp", "format", "sort")
|
||||
)
|
||||
fmt_sort = _parse_csv_list(fmt_sort_val)
|
||||
|
||||
defaults = YtDlpDefaults(
|
||||
video_format=str(nested_video or video_format or YtDlpDefaults.video_format),
|
||||
audio_format=str(nested_audio or audio_format or YtDlpDefaults.audio_format),
|
||||
format_sort=fmt_sort,
|
||||
)
|
||||
|
||||
return defaults
|
||||
|
||||
def resolve_cookiefile(self) -> Optional[Path]:
|
||||
return self._cookiefile
|
||||
|
||||
def default_format(self, mode: str) -> str:
|
||||
m = str(mode or "").lower().strip()
|
||||
if m == "audio":
|
||||
return self.defaults.audio_format
|
||||
return self.defaults.video_format
|
||||
|
||||
def build_yt_dlp_cli_args(
|
||||
self,
|
||||
*,
|
||||
url: str,
|
||||
output_dir: Optional[Path] = None,
|
||||
ytdl_format: Optional[str] = None,
|
||||
playlist_items: Optional[str] = None,
|
||||
no_playlist: bool = False,
|
||||
quiet: bool = True,
|
||||
extra_args: Optional[Sequence[str]] = None,
|
||||
) -> List[str]:
|
||||
"""Build a yt-dlp command line (argv list).
|
||||
|
||||
This is primarily for debug output or subprocess execution.
|
||||
"""
|
||||
argv: List[str] = ["yt-dlp"]
|
||||
if quiet:
|
||||
argv.extend(["--quiet", "--no-warnings"])
|
||||
argv.append("--no-progress")
|
||||
|
||||
cookiefile = self.resolve_cookiefile()
|
||||
if cookiefile is not None:
|
||||
argv.extend(["--cookies", str(cookiefile)])
|
||||
|
||||
if no_playlist:
|
||||
argv.append("--no-playlist")
|
||||
if playlist_items:
|
||||
argv.extend(["--playlist-items", str(playlist_items)])
|
||||
|
||||
fmt = (ytdl_format or "").strip()
|
||||
if fmt:
|
||||
# Use long form to avoid confusion with app-level flags.
|
||||
argv.extend(["--format", fmt])
|
||||
|
||||
if self.defaults.format_sort:
|
||||
for sort_key in self.defaults.format_sort:
|
||||
argv.extend(["-S", sort_key])
|
||||
|
||||
if output_dir is not None:
|
||||
outtmpl = str((output_dir / "%(title)s.%(ext)s").resolve())
|
||||
argv.extend(["-o", outtmpl])
|
||||
|
||||
if extra_args:
|
||||
argv.extend([str(a) for a in extra_args if str(a).strip()])
|
||||
|
||||
argv.append(str(url))
|
||||
return argv
|
||||
|
||||
def debug_print_cli(self, argv: Sequence[str]) -> None:
|
||||
try:
|
||||
debug("yt-dlp argv: " + " ".join(str(a) for a in argv))
|
||||
except Exception:
|
||||
pass
|
||||
Reference in New Issue
Block a user