2025-12-16 23:23:43 -08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
2025-12-17 03:16:41 -08:00
|
|
|
import contextlib
|
2025-12-16 23:23:43 -08:00
|
|
|
from dataclasses import dataclass
|
|
|
|
|
from typing import Any, Dict, Iterator, Optional
|
|
|
|
|
|
|
|
|
|
from SYS.logger import debug
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
|
|
|
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
|
|
|
|
|
|
HAS_PLAYWRIGHT = True
|
|
|
|
|
_PLAYWRIGHT_IMPORT_ERROR: Optional[Exception] = None
|
|
|
|
|
except Exception as exc: # pragma: no cover
|
|
|
|
|
HAS_PLAYWRIGHT = False
|
|
|
|
|
_PLAYWRIGHT_IMPORT_ERROR = exc
|
|
|
|
|
PlaywrightTimeoutError = TimeoutError # type: ignore
|
|
|
|
|
sync_playwright = None # type: ignore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Re-export for consumers (e.g. cmdlets catching navigation timeouts)
|
|
|
|
|
__all__ = ["HAS_PLAYWRIGHT", "PlaywrightTimeoutError", "PlaywrightTool", "PlaywrightDefaults"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_nested(config: Dict[str, Any], *path: str) -> Any:
|
|
|
|
|
cur: Any = config
|
|
|
|
|
for key in path:
|
|
|
|
|
if not isinstance(cur, dict):
|
|
|
|
|
return None
|
|
|
|
|
cur = cur.get(key)
|
|
|
|
|
return cur
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(slots=True)
|
|
|
|
|
class PlaywrightDefaults:
|
|
|
|
|
browser: str = "chromium" # chromium|firefox|webkit
|
|
|
|
|
headless: bool = True
|
|
|
|
|
user_agent: str = (
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
|
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
|
|
|
"Chrome/120.0.0.0 Safari/537.36"
|
|
|
|
|
)
|
2025-12-21 05:10:09 -08:00
|
|
|
viewport_width: int = 1920
|
|
|
|
|
viewport_height: int = 1080
|
2025-12-16 23:23:43 -08:00
|
|
|
navigation_timeout_ms: int = 90_000
|
|
|
|
|
ignore_https_errors: bool = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PlaywrightTool:
|
|
|
|
|
"""Small wrapper to standardize Playwright defaults and lifecycle.
|
|
|
|
|
|
|
|
|
|
This is meant to keep cmdlets/providers from duplicating:
|
|
|
|
|
- sync_playwright start/stop
|
|
|
|
|
- browser launch/context creation
|
|
|
|
|
- user-agent/viewport defaults
|
|
|
|
|
|
|
|
|
|
Config overrides (top-level keys):
|
|
|
|
|
- playwright.browser="chromium"
|
|
|
|
|
- playwright.headless=true
|
|
|
|
|
- playwright.user_agent="..."
|
|
|
|
|
- playwright.viewport_width=1280
|
|
|
|
|
- playwright.viewport_height=1200
|
|
|
|
|
- playwright.navigation_timeout_ms=90000
|
|
|
|
|
- playwright.ignore_https_errors=true
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
|
|
|
|
|
self._config: Dict[str, Any] = dict(config or {})
|
|
|
|
|
self.defaults = self._load_defaults()
|
|
|
|
|
|
|
|
|
|
def _load_defaults(self) -> PlaywrightDefaults:
|
|
|
|
|
cfg = self._config
|
2025-12-17 03:16:41 -08:00
|
|
|
defaults = PlaywrightDefaults()
|
2025-12-16 23:23:43 -08:00
|
|
|
tool_block = _get_nested(cfg, "tool", "playwright")
|
|
|
|
|
if not isinstance(tool_block, dict):
|
|
|
|
|
tool_block = {}
|
|
|
|
|
pw_block = cfg.get("playwright") if isinstance(cfg.get("playwright"), dict) else {}
|
|
|
|
|
if not isinstance(pw_block, dict):
|
|
|
|
|
pw_block = {}
|
|
|
|
|
|
|
|
|
|
def _get(name: str, fallback: Any) -> Any:
|
|
|
|
|
val = tool_block.get(name)
|
|
|
|
|
if val is None:
|
|
|
|
|
val = pw_block.get(name)
|
|
|
|
|
if val is None:
|
|
|
|
|
val = cfg.get(f"playwright_{name}")
|
|
|
|
|
if val is None:
|
|
|
|
|
val = _get_nested(cfg, "playwright", name)
|
|
|
|
|
return fallback if val is None else val
|
|
|
|
|
|
2025-12-17 03:16:41 -08:00
|
|
|
browser = str(_get("browser", defaults.browser)).strip().lower() or "chromium"
|
2025-12-16 23:23:43 -08:00
|
|
|
if browser not in {"chromium", "firefox", "webkit"}:
|
|
|
|
|
browser = "chromium"
|
|
|
|
|
|
2025-12-17 03:16:41 -08:00
|
|
|
headless_raw = _get("headless", defaults.headless)
|
2025-12-16 23:23:43 -08:00
|
|
|
headless = bool(headless_raw)
|
|
|
|
|
|
2025-12-17 03:16:41 -08:00
|
|
|
ua = str(_get("user_agent", defaults.user_agent))
|
2025-12-16 23:23:43 -08:00
|
|
|
|
|
|
|
|
def _int(name: str, fallback: int) -> int:
|
|
|
|
|
raw = _get(name, fallback)
|
|
|
|
|
try:
|
|
|
|
|
return int(raw)
|
|
|
|
|
except Exception:
|
|
|
|
|
return fallback
|
|
|
|
|
|
2025-12-17 03:16:41 -08:00
|
|
|
vw = _int("viewport_width", defaults.viewport_width)
|
|
|
|
|
vh = _int("viewport_height", defaults.viewport_height)
|
|
|
|
|
nav_timeout = _int("navigation_timeout_ms", defaults.navigation_timeout_ms)
|
2025-12-16 23:23:43 -08:00
|
|
|
|
2025-12-17 03:16:41 -08:00
|
|
|
ignore_https = bool(_get("ignore_https_errors", defaults.ignore_https_errors))
|
2025-12-16 23:23:43 -08:00
|
|
|
|
|
|
|
|
return PlaywrightDefaults(
|
|
|
|
|
browser=browser,
|
|
|
|
|
headless=headless,
|
|
|
|
|
user_agent=ua,
|
|
|
|
|
viewport_width=vw,
|
|
|
|
|
viewport_height=vh,
|
|
|
|
|
navigation_timeout_ms=nav_timeout,
|
|
|
|
|
ignore_https_errors=ignore_https,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def require(self) -> None:
|
|
|
|
|
if HAS_PLAYWRIGHT and sync_playwright is not None:
|
|
|
|
|
return
|
|
|
|
|
detail = str(_PLAYWRIGHT_IMPORT_ERROR or "playwright is not installed")
|
|
|
|
|
raise RuntimeError(
|
|
|
|
|
"playwright is required; install with: pip install playwright; then: playwright install\n"
|
|
|
|
|
f"detail: {detail}"
|
|
|
|
|
)
|
|
|
|
|
|
2025-12-17 03:16:41 -08:00
|
|
|
@contextlib.contextmanager
|
2025-12-16 23:23:43 -08:00
|
|
|
def open_page(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
headless: Optional[bool] = None,
|
|
|
|
|
user_agent: Optional[str] = None,
|
|
|
|
|
viewport_width: Optional[int] = None,
|
|
|
|
|
viewport_height: Optional[int] = None,
|
|
|
|
|
ignore_https_errors: Optional[bool] = None,
|
|
|
|
|
) -> Iterator[Any]:
|
|
|
|
|
"""Context manager yielding a Playwright page with sane defaults."""
|
|
|
|
|
self.require()
|
|
|
|
|
|
|
|
|
|
h = self.defaults.headless if headless is None else bool(headless)
|
|
|
|
|
ua = self.defaults.user_agent if user_agent is None else str(user_agent)
|
|
|
|
|
vw = self.defaults.viewport_width if viewport_width is None else int(viewport_width)
|
|
|
|
|
vh = self.defaults.viewport_height if viewport_height is None else int(viewport_height)
|
|
|
|
|
ihe = self.defaults.ignore_https_errors if ignore_https_errors is None else bool(ignore_https_errors)
|
|
|
|
|
|
2025-12-21 05:10:09 -08:00
|
|
|
# Support Playwright-native headers/user-agent.
|
|
|
|
|
# If user_agent is unset/empty or explicitly set to one of these tokens,
|
|
|
|
|
# we omit the user_agent override so Playwright uses its bundled Chromium UA.
|
|
|
|
|
ua_value: Optional[str]
|
|
|
|
|
ua_text = str(ua or "").strip()
|
|
|
|
|
if not ua_text or ua_text.lower() in {"native", "playwright", "default"}:
|
|
|
|
|
ua_value = None
|
|
|
|
|
else:
|
|
|
|
|
ua_value = ua_text
|
|
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
pw = None
|
|
|
|
|
browser = None
|
|
|
|
|
context = None
|
|
|
|
|
try:
|
|
|
|
|
assert sync_playwright is not None
|
|
|
|
|
pw = sync_playwright().start()
|
|
|
|
|
|
|
|
|
|
browser_type = getattr(pw, self.defaults.browser, None)
|
|
|
|
|
if browser_type is None:
|
|
|
|
|
browser_type = pw.chromium
|
|
|
|
|
|
|
|
|
|
browser = browser_type.launch(
|
|
|
|
|
headless=h,
|
|
|
|
|
args=["--disable-blink-features=AutomationControlled"],
|
|
|
|
|
)
|
2025-12-21 05:10:09 -08:00
|
|
|
context_kwargs: Dict[str, Any] = {
|
|
|
|
|
"viewport": {"width": vw, "height": vh},
|
|
|
|
|
"ignore_https_errors": ihe,
|
|
|
|
|
}
|
|
|
|
|
if ua_value is not None:
|
|
|
|
|
context_kwargs["user_agent"] = ua_value
|
|
|
|
|
|
|
|
|
|
context = browser.new_context(**context_kwargs)
|
2025-12-16 23:23:43 -08:00
|
|
|
page = context.new_page()
|
|
|
|
|
yield page
|
|
|
|
|
finally:
|
|
|
|
|
try:
|
|
|
|
|
if context is not None:
|
|
|
|
|
context.close()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
try:
|
|
|
|
|
if browser is not None:
|
|
|
|
|
browser.close()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
try:
|
|
|
|
|
if pw is not None:
|
|
|
|
|
pw.stop()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def goto(self, page: Any, url: str) -> None:
|
|
|
|
|
"""Navigate with configured timeout."""
|
|
|
|
|
try:
|
|
|
|
|
page.goto(url, timeout=int(self.defaults.navigation_timeout_ms), wait_until="domcontentloaded")
|
|
|
|
|
except Exception:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
def debug_dump(self) -> None:
|
|
|
|
|
try:
|
|
|
|
|
debug(
|
|
|
|
|
f"[playwright] browser={self.defaults.browser} headless={self.defaults.headless} "
|
|
|
|
|
f"viewport={self.defaults.viewport_width}x{self.defaults.viewport_height} "
|
|
|
|
|
f"nav_timeout_ms={self.defaults.navigation_timeout_ms}"
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|