jkj
This commit is contained in:
203
tool/playwright.py
Normal file
203
tool/playwright.py
Normal file
@@ -0,0 +1,203 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Iterator, Optional
|
||||
|
||||
from SYS.logger import debug
|
||||
|
||||
try:
|
||||
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
HAS_PLAYWRIGHT = True
|
||||
_PLAYWRIGHT_IMPORT_ERROR: Optional[Exception] = None
|
||||
except Exception as exc: # pragma: no cover
|
||||
HAS_PLAYWRIGHT = False
|
||||
_PLAYWRIGHT_IMPORT_ERROR = exc
|
||||
PlaywrightTimeoutError = TimeoutError # type: ignore
|
||||
sync_playwright = None # type: ignore
|
||||
|
||||
|
||||
# Re-export for consumers (e.g. cmdlets catching navigation timeouts)
|
||||
__all__ = ["HAS_PLAYWRIGHT", "PlaywrightTimeoutError", "PlaywrightTool", "PlaywrightDefaults"]
|
||||
|
||||
|
||||
def _get_nested(config: Dict[str, Any], *path: str) -> Any:
|
||||
cur: Any = config
|
||||
for key in path:
|
||||
if not isinstance(cur, dict):
|
||||
return None
|
||||
cur = cur.get(key)
|
||||
return cur
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class PlaywrightDefaults:
|
||||
browser: str = "chromium" # chromium|firefox|webkit
|
||||
headless: bool = True
|
||||
user_agent: str = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
viewport_width: int = 1280
|
||||
viewport_height: int = 1200
|
||||
navigation_timeout_ms: int = 90_000
|
||||
ignore_https_errors: bool = True
|
||||
|
||||
|
||||
class PlaywrightTool:
|
||||
"""Small wrapper to standardize Playwright defaults and lifecycle.
|
||||
|
||||
This is meant to keep cmdlets/providers from duplicating:
|
||||
- sync_playwright start/stop
|
||||
- browser launch/context creation
|
||||
- user-agent/viewport defaults
|
||||
|
||||
Config overrides (top-level keys):
|
||||
- playwright.browser="chromium"
|
||||
- playwright.headless=true
|
||||
- playwright.user_agent="..."
|
||||
- playwright.viewport_width=1280
|
||||
- playwright.viewport_height=1200
|
||||
- playwright.navigation_timeout_ms=90000
|
||||
- playwright.ignore_https_errors=true
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||||
self._config: Dict[str, Any] = dict(config or {})
|
||||
self.defaults = self._load_defaults()
|
||||
|
||||
def _load_defaults(self) -> PlaywrightDefaults:
|
||||
cfg = self._config
|
||||
tool_block = _get_nested(cfg, "tool", "playwright")
|
||||
if not isinstance(tool_block, dict):
|
||||
tool_block = {}
|
||||
pw_block = cfg.get("playwright") if isinstance(cfg.get("playwright"), dict) else {}
|
||||
if not isinstance(pw_block, dict):
|
||||
pw_block = {}
|
||||
|
||||
def _get(name: str, fallback: Any) -> Any:
|
||||
val = tool_block.get(name)
|
||||
if val is None:
|
||||
val = pw_block.get(name)
|
||||
if val is None:
|
||||
val = cfg.get(f"playwright_{name}")
|
||||
if val is None:
|
||||
val = _get_nested(cfg, "playwright", name)
|
||||
return fallback if val is None else val
|
||||
|
||||
browser = str(_get("browser", PlaywrightDefaults.browser)).strip().lower() or "chromium"
|
||||
if browser not in {"chromium", "firefox", "webkit"}:
|
||||
browser = "chromium"
|
||||
|
||||
headless_raw = _get("headless", PlaywrightDefaults.headless)
|
||||
headless = bool(headless_raw)
|
||||
|
||||
ua = str(_get("user_agent", PlaywrightDefaults.user_agent))
|
||||
|
||||
def _int(name: str, fallback: int) -> int:
|
||||
raw = _get(name, fallback)
|
||||
try:
|
||||
return int(raw)
|
||||
except Exception:
|
||||
return fallback
|
||||
|
||||
vw = _int("viewport_width", PlaywrightDefaults.viewport_width)
|
||||
vh = _int("viewport_height", PlaywrightDefaults.viewport_height)
|
||||
nav_timeout = _int("navigation_timeout_ms", PlaywrightDefaults.navigation_timeout_ms)
|
||||
|
||||
ignore_https = bool(_get("ignore_https_errors", PlaywrightDefaults.ignore_https_errors))
|
||||
|
||||
return PlaywrightDefaults(
|
||||
browser=browser,
|
||||
headless=headless,
|
||||
user_agent=ua,
|
||||
viewport_width=vw,
|
||||
viewport_height=vh,
|
||||
navigation_timeout_ms=nav_timeout,
|
||||
ignore_https_errors=ignore_https,
|
||||
)
|
||||
|
||||
def require(self) -> None:
|
||||
if HAS_PLAYWRIGHT and sync_playwright is not None:
|
||||
return
|
||||
detail = str(_PLAYWRIGHT_IMPORT_ERROR or "playwright is not installed")
|
||||
raise RuntimeError(
|
||||
"playwright is required; install with: pip install playwright; then: playwright install\n"
|
||||
f"detail: {detail}"
|
||||
)
|
||||
|
||||
def open_page(
|
||||
self,
|
||||
*,
|
||||
headless: Optional[bool] = None,
|
||||
user_agent: Optional[str] = None,
|
||||
viewport_width: Optional[int] = None,
|
||||
viewport_height: Optional[int] = None,
|
||||
ignore_https_errors: Optional[bool] = None,
|
||||
) -> Iterator[Any]:
|
||||
"""Context manager yielding a Playwright page with sane defaults."""
|
||||
self.require()
|
||||
|
||||
h = self.defaults.headless if headless is None else bool(headless)
|
||||
ua = self.defaults.user_agent if user_agent is None else str(user_agent)
|
||||
vw = self.defaults.viewport_width if viewport_width is None else int(viewport_width)
|
||||
vh = self.defaults.viewport_height if viewport_height is None else int(viewport_height)
|
||||
ihe = self.defaults.ignore_https_errors if ignore_https_errors is None else bool(ignore_https_errors)
|
||||
|
||||
pw = None
|
||||
browser = None
|
||||
context = None
|
||||
try:
|
||||
assert sync_playwright is not None
|
||||
pw = sync_playwright().start()
|
||||
|
||||
browser_type = getattr(pw, self.defaults.browser, None)
|
||||
if browser_type is None:
|
||||
browser_type = pw.chromium
|
||||
|
||||
browser = browser_type.launch(
|
||||
headless=h,
|
||||
args=["--disable-blink-features=AutomationControlled"],
|
||||
)
|
||||
context = browser.new_context(
|
||||
user_agent=ua,
|
||||
viewport={"width": vw, "height": vh},
|
||||
ignore_https_errors=ihe,
|
||||
)
|
||||
page = context.new_page()
|
||||
yield page
|
||||
finally:
|
||||
try:
|
||||
if context is not None:
|
||||
context.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if browser is not None:
|
||||
browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if pw is not None:
|
||||
pw.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def goto(self, page: Any, url: str) -> None:
|
||||
"""Navigate with configured timeout."""
|
||||
try:
|
||||
page.goto(url, timeout=int(self.defaults.navigation_timeout_ms), wait_until="domcontentloaded")
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
def debug_dump(self) -> None:
|
||||
try:
|
||||
debug(
|
||||
f"[playwright] browser={self.defaults.browser} headless={self.defaults.headless} "
|
||||
f"viewport={self.defaults.viewport_width}x{self.defaults.viewport_height} "
|
||||
f"nav_timeout_ms={self.defaults.navigation_timeout_ms}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
Reference in New Issue
Block a user