from __future__ import annotations import contextlib from dataclasses import dataclass from typing import Any, Dict, Iterator, Optional from SYS.logger import debug try: from playwright.sync_api import TimeoutError as PlaywrightTimeoutError from playwright.sync_api import sync_playwright HAS_PLAYWRIGHT = True _PLAYWRIGHT_IMPORT_ERROR: Optional[Exception] = None except Exception as exc: # pragma: no cover HAS_PLAYWRIGHT = False _PLAYWRIGHT_IMPORT_ERROR = exc PlaywrightTimeoutError = TimeoutError # type: ignore sync_playwright = None # type: ignore # Re-export for consumers (e.g. cmdlets catching navigation timeouts) __all__ = [ "HAS_PLAYWRIGHT", "PlaywrightTimeoutError", "PlaywrightTool", "PlaywrightDefaults" ] def _get_nested(config: Dict[str, Any], *path: str) -> Any: cur: Any = config for key in path: if not isinstance(cur, dict): return None cur = cur.get(key) return cur @dataclass(slots=True) class PlaywrightDefaults: browser: str = "chromium" # chromium|firefox|webkit headless: bool = True user_agent: str = ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/120.0.0.0 Safari/537.36" ) viewport_width: int = 1920 viewport_height: int = 1080 navigation_timeout_ms: int = 90_000 ignore_https_errors: bool = True class PlaywrightTool: """Small wrapper to standardize Playwright defaults and lifecycle. This is meant to keep cmdlets/providers from duplicating: - sync_playwright start/stop - browser launch/context creation - user-agent/viewport defaults Config overrides (top-level keys): - playwright.browser="chromium" - playwright.headless=true - playwright.user_agent="..." - playwright.viewport_width=1280 - playwright.viewport_height=1200 - playwright.navigation_timeout_ms=90000 - playwright.ignore_https_errors=true """ def __init__(self, config: Optional[Dict[str, Any]] = None) -> None: self._config: Dict[str, Any] = dict(config or {}) self.defaults = self._load_defaults() def _load_defaults(self) -> PlaywrightDefaults: cfg = self._config defaults = PlaywrightDefaults() tool_block = _get_nested(cfg, "tool", "playwright") if not isinstance(tool_block, dict): tool_block = {} pw_block = cfg.get("playwright") if isinstance(cfg.get("playwright"), dict) else {} if not isinstance(pw_block, dict): pw_block = {} def _get(name: str, fallback: Any) -> Any: val = tool_block.get(name) if val is None: val = pw_block.get(name) if val is None: val = cfg.get(f"playwright_{name}") if val is None: val = _get_nested(cfg, "playwright", name) return fallback if val is None else val browser = str(_get("browser", defaults.browser)).strip().lower() or "chromium" if browser not in {"chromium", "firefox", "webkit"}: browser = "chromium" headless_raw = _get("headless", defaults.headless) headless = bool(headless_raw) ua = str(_get("user_agent", defaults.user_agent)) def _int(name: str, fallback: int) -> int: raw = _get(name, fallback) try: return int(raw) except Exception: return fallback vw = _int("viewport_width", defaults.viewport_width) vh = _int("viewport_height", defaults.viewport_height) nav_timeout = _int("navigation_timeout_ms", defaults.navigation_timeout_ms) ignore_https = bool(_get("ignore_https_errors", defaults.ignore_https_errors)) return PlaywrightDefaults( browser=browser, headless=headless, user_agent=ua, viewport_width=vw, viewport_height=vh, navigation_timeout_ms=nav_timeout, ignore_https_errors=ignore_https, ) def require(self) -> None: if HAS_PLAYWRIGHT and sync_playwright is not None: return detail = str(_PLAYWRIGHT_IMPORT_ERROR or "playwright is not installed") raise RuntimeError( "playwright is required; install with: pip install playwright; then: playwright install\n" f"detail: {detail}" ) @contextlib.contextmanager def open_page( self, *, headless: Optional[bool] = None, user_agent: Optional[str] = None, viewport_width: Optional[int] = None, viewport_height: Optional[int] = None, ignore_https_errors: Optional[bool] = None, ) -> Iterator[Any]: """Context manager yielding a Playwright page with sane defaults.""" self.require() h = self.defaults.headless if headless is None else bool(headless) ua = self.defaults.user_agent if user_agent is None else str(user_agent) vw = self.defaults.viewport_width if viewport_width is None else int( viewport_width ) vh = self.defaults.viewport_height if viewport_height is None else int( viewport_height ) ihe = ( self.defaults.ignore_https_errors if ignore_https_errors is None else bool(ignore_https_errors) ) # Support Playwright-native headers/user-agent. # If user_agent is unset/empty or explicitly set to one of these tokens, # we omit the user_agent override so Playwright uses its bundled Chromium UA. ua_value: Optional[str] ua_text = str(ua or "").strip() if not ua_text or ua_text.lower() in {"native", "playwright", "default"}: ua_value = None else: ua_value = ua_text pw = None browser = None context = None try: assert sync_playwright is not None pw = sync_playwright().start() browser_type = getattr(pw, self.defaults.browser, None) if browser_type is None: browser_type = pw.chromium browser = browser_type.launch( headless=h, args=["--disable-blink-features=AutomationControlled"], ) context_kwargs: Dict[str, Any] = { "viewport": { "width": vw, "height": vh }, "ignore_https_errors": ihe, } if ua_value is not None: context_kwargs["user_agent"] = ua_value context = browser.new_context(**context_kwargs) page = context.new_page() yield page finally: try: if context is not None: context.close() except Exception: pass try: if browser is not None: browser.close() except Exception: pass try: if pw is not None: pw.stop() except Exception: pass def goto(self, page: Any, url: str) -> None: """Navigate with configured timeout.""" try: page.goto( url, timeout=int(self.defaults.navigation_timeout_ms), wait_until="domcontentloaded" ) except Exception: raise def debug_dump(self) -> None: try: debug( f"[playwright] browser={self.defaults.browser} headless={self.defaults.headless} " f"viewport={self.defaults.viewport_width}x{self.defaults.viewport_height} " f"nav_timeout_ms={self.defaults.navigation_timeout_ms}" ) except Exception: pass