Files
Medios-Macina/tool/playwright.py

245 lines
8.1 KiB
Python

from __future__ import annotations
import contextlib
from dataclasses import dataclass
from typing import Any, Dict, Iterator, Optional
from SYS.logger import debug
try:
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
from playwright.sync_api import sync_playwright
HAS_PLAYWRIGHT = True
_PLAYWRIGHT_IMPORT_ERROR: Optional[Exception] = None
except Exception as exc: # pragma: no cover
HAS_PLAYWRIGHT = False
_PLAYWRIGHT_IMPORT_ERROR = exc
PlaywrightTimeoutError = TimeoutError # type: ignore
sync_playwright = None # type: ignore
# Re-export for consumers (e.g. cmdlets catching navigation timeouts)
__all__ = [
"HAS_PLAYWRIGHT",
"PlaywrightTimeoutError",
"PlaywrightTool",
"PlaywrightDefaults"
]
def _get_nested(config: Dict[str, Any], *path: str) -> Any:
cur: Any = config
for key in path:
if not isinstance(cur, dict):
return None
cur = cur.get(key)
return cur
@dataclass(slots=True)
class PlaywrightDefaults:
browser: str = "chromium" # chromium|firefox|webkit
headless: bool = True
user_agent: str = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
viewport_width: int = 1920
viewport_height: int = 1080
navigation_timeout_ms: int = 90_000
ignore_https_errors: bool = True
class PlaywrightTool:
"""Small wrapper to standardize Playwright defaults and lifecycle.
This is meant to keep cmdlets/providers from duplicating:
- sync_playwright start/stop
- browser launch/context creation
- user-agent/viewport defaults
Config overrides (top-level keys):
- playwright.browser="chromium"
- playwright.headless=true
- playwright.user_agent="..."
- playwright.viewport_width=1280
- playwright.viewport_height=1200
- playwright.navigation_timeout_ms=90000
- playwright.ignore_https_errors=true
"""
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
self._config: Dict[str,
Any] = dict(config or {})
self.defaults = self._load_defaults()
def _load_defaults(self) -> PlaywrightDefaults:
cfg = self._config
defaults = PlaywrightDefaults()
tool_block = _get_nested(cfg, "tool", "playwright")
if not isinstance(tool_block, dict):
tool_block = {}
pw_block = cfg.get("playwright") if isinstance(cfg.get("playwright"),
dict) else {}
if not isinstance(pw_block, dict):
pw_block = {}
def _get(name: str, fallback: Any) -> Any:
val = tool_block.get(name)
if val is None:
val = pw_block.get(name)
if val is None:
val = cfg.get(f"playwright_{name}")
if val is None:
val = _get_nested(cfg, "playwright", name)
return fallback if val is None else val
browser = str(_get("browser", defaults.browser)).strip().lower() or "chromium"
if browser not in {"chromium",
"firefox",
"webkit"}:
browser = "chromium"
headless_raw = _get("headless", defaults.headless)
headless = bool(headless_raw)
ua = str(_get("user_agent", defaults.user_agent))
def _int(name: str, fallback: int) -> int:
raw = _get(name, fallback)
try:
return int(raw)
except Exception:
return fallback
vw = _int("viewport_width", defaults.viewport_width)
vh = _int("viewport_height", defaults.viewport_height)
nav_timeout = _int("navigation_timeout_ms", defaults.navigation_timeout_ms)
ignore_https = bool(_get("ignore_https_errors", defaults.ignore_https_errors))
return PlaywrightDefaults(
browser=browser,
headless=headless,
user_agent=ua,
viewport_width=vw,
viewport_height=vh,
navigation_timeout_ms=nav_timeout,
ignore_https_errors=ignore_https,
)
def require(self) -> None:
if HAS_PLAYWRIGHT and sync_playwright is not None:
return
detail = str(_PLAYWRIGHT_IMPORT_ERROR or "playwright is not installed")
raise RuntimeError(
"playwright is required; install with: pip install playwright; then: playwright install\n"
f"detail: {detail}"
)
@contextlib.contextmanager
def open_page(
self,
*,
headless: Optional[bool] = None,
user_agent: Optional[str] = None,
viewport_width: Optional[int] = None,
viewport_height: Optional[int] = None,
ignore_https_errors: Optional[bool] = None,
) -> Iterator[Any]:
"""Context manager yielding a Playwright page with sane defaults."""
self.require()
h = self.defaults.headless if headless is None else bool(headless)
ua = self.defaults.user_agent if user_agent is None else str(user_agent)
vw = self.defaults.viewport_width if viewport_width is None else int(
viewport_width
)
vh = self.defaults.viewport_height if viewport_height is None else int(
viewport_height
)
ihe = (
self.defaults.ignore_https_errors
if ignore_https_errors is None else bool(ignore_https_errors)
)
# Support Playwright-native headers/user-agent.
# If user_agent is unset/empty or explicitly set to one of these tokens,
# we omit the user_agent override so Playwright uses its bundled Chromium UA.
ua_value: Optional[str]
ua_text = str(ua or "").strip()
if not ua_text or ua_text.lower() in {"native",
"playwright",
"default"}:
ua_value = None
else:
ua_value = ua_text
pw = None
browser = None
context = None
try:
assert sync_playwright is not None
pw = sync_playwright().start()
browser_type = getattr(pw, self.defaults.browser, None)
if browser_type is None:
browser_type = pw.chromium
browser = browser_type.launch(
headless=h,
args=["--disable-blink-features=AutomationControlled"],
)
context_kwargs: Dict[str,
Any] = {
"viewport": {
"width": vw,
"height": vh
},
"ignore_https_errors": ihe,
}
if ua_value is not None:
context_kwargs["user_agent"] = ua_value
context = browser.new_context(**context_kwargs)
page = context.new_page()
yield page
finally:
try:
if context is not None:
context.close()
except Exception:
pass
try:
if browser is not None:
browser.close()
except Exception:
pass
try:
if pw is not None:
pw.stop()
except Exception:
pass
def goto(self, page: Any, url: str) -> None:
"""Navigate with configured timeout."""
try:
page.goto(
url,
timeout=int(self.defaults.navigation_timeout_ms),
wait_until="domcontentloaded"
)
except Exception:
raise
def debug_dump(self) -> None:
try:
debug(
f"[playwright] browser={self.defaults.browser} headless={self.defaults.headless} "
f"viewport={self.defaults.viewport_width}x{self.defaults.viewport_height} "
f"nav_timeout_ms={self.defaults.navigation_timeout_ms}"
)
except Exception:
pass