jkj

2025-12-16 23:23:43 -08:00
parent 9873280f0e
commit 86918f2ae2
46 changed files with 2277 additions and 1347 deletions
--- a/cmdlet/screen_shot.py
+++ b/cmdlet/screen_shot.py
@@ -20,7 +20,16 @@ from urllib.parse import urlsplit, quote, urljoin
 from SYS.logger import log, debug
 from API.HTTP import HTTPClient
 from SYS.utils import ensure_directory, unique_path, unique_preserve_order
-from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, should_show_help, get_field
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+create_pipe_object_result = sh.create_pipe_object_result
+normalize_result_input = sh.normalize_result_input
+should_show_help = sh.should_show_help
+get_field = sh.get_field
+parse_cmdlet_args = sh.parse_cmdlet_args
 import pipeline as pipeline_context

 # ============================================================================
@@ -33,20 +42,7 @@ import pipeline as pipeline_context
 # Playwright & Screenshot Dependencies
 # ============================================================================

-try:
-    from playwright.sync_api import (
-        TimeoutError as PlaywrightTimeoutError,
-        sync_playwright,
-    )
-    HAS_PLAYWRIGHT = True
-except Exception:
-    HAS_PLAYWRIGHT = False
-    PlaywrightTimeoutError = TimeoutError  # type: ignore
-
-    def sync_playwright(*_args: Any, **_kwargs: Any) -> Any:  # type: ignore
-        raise RuntimeError(
-            "playwright is required for screenshot capture; install with: pip install playwright; then: playwright install"
-        )
+from tool.playwright import HAS_PLAYWRIGHT, PlaywrightTimeoutError, PlaywrightTool

 try:
    from config import resolve_output_dir
@@ -128,6 +124,7 @@ class ScreenshotOptions:
    prefer_platform_target: bool = False
    target_selectors: Optional[Sequence[str]] = None
    selector_timeout_ms: int = 10_000
+    playwright_tool: Optional[PlaywrightTool] = None


@dataclass(slots=True)
@@ -324,142 +321,119 @@ def _prepare_output_path(options: ScreenshotOptions) -> Path:
 def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None:
    """Capture screenshot using Playwright."""
    debug(f"[_capture] Starting capture for {options.url} -> {destination}")
-    playwright = None
-    browser = None
-    context = None
    try:
-        debug("Starting Playwright...", flush=True)
-        playwright = sync_playwright().start()
-        log("Launching Chromium browser...", flush=True)
+        tool = options.playwright_tool or PlaywrightTool({})
+        tool.debug_dump()
+
+        log("Launching browser...", flush=True)
        format_name = _normalise_format(options.output_format)
        headless = options.headless or format_name == "pdf"
        debug(f"[_capture] Format: {format_name}, Headless: {headless}")
        
        if format_name == "pdf" and not options.headless:
            warnings.append("pdf output requires headless Chromium; overriding headless mode")
-        browser = playwright.chromium.launch(
-            headless=headless,
-            args=["--disable-blink-features=AutomationControlled"],
-        )
-        log("Creating browser context...", flush=True)
-        context = browser.new_context(
-            user_agent=USER_AGENT,
-            viewport=DEFAULT_VIEWPORT,
-            ignore_https_errors=True,
-        )
-        page = context.new_page()
-        log(f"Navigating to {options.url}...", flush=True)
-        try:
-            page.goto(options.url, timeout=90_000, wait_until="domcontentloaded")
-            log("Page loaded successfully", flush=True)
-        except PlaywrightTimeoutError:
-            warnings.append("navigation timeout; capturing current page state")
-            log("Navigation timeout; proceeding with current state", flush=True)
-        
-        # Skip article lookup by default (wait_for_article defaults to False)
-        if options.wait_for_article:
+
+        with tool.open_page(headless=headless) as page:
+            log(f"Navigating to {options.url}...", flush=True)
            try:
-                log("Waiting for article element...", flush=True)
-                page.wait_for_selector("article", timeout=10_000)
-                log("Article element found", flush=True)
+                tool.goto(page, options.url)
+                log("Page loaded successfully", flush=True)
            except PlaywrightTimeoutError:
-                warnings.append("<article> selector not found; capturing fallback")
-                log("Article element not found; using fallback", flush=True)
-        
-        if options.wait_after_load > 0:
-            log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
-            time.sleep(min(10.0, max(0.0, options.wait_after_load)))
-        if options.replace_video_posters:
-            log("Replacing video elements with posters...", flush=True)
-            page.evaluate(
-                """
-                    document.querySelectorAll('video').forEach(v => {
-                        if (v.poster) {
-                            const img = document.createElement('img');
-                            img.src = v.poster;
-                            img.style.maxWidth = '100%';
-                            img.style.borderRadius = '12px';
-                            v.replaceWith(img);
-                        }
-                    });
-                """
-            )
-        # Attempt platform-specific target capture if requested (and not PDF)
-        element_captured = False
-        if options.prefer_platform_target and format_name != "pdf":
-            log("Attempting platform-specific content capture...", flush=True)
-            try:
-                _platform_preprocess(options.url, page, warnings)
-            except Exception as e:
-                debug(f"[_capture] Platform preprocess failed: {e}")
-                pass
-            selectors = list(options.target_selectors or [])
-            if not selectors:
-                selectors = _selectors_for_url(options.url)
+                warnings.append("navigation timeout; capturing current page state")
+                log("Navigation timeout; proceeding with current state", flush=True)
            
-            debug(f"[_capture] Trying selectors: {selectors}")
-            for sel in selectors:
+            # Skip article lookup by default (wait_for_article defaults to False)
+            if options.wait_for_article:
                try:
-                    log(f"Trying selector: {sel}", flush=True)
-                    el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
+                    log("Waiting for article element...", flush=True)
+                    page.wait_for_selector("article", timeout=10_000)
+                    log("Article element found", flush=True)
                except PlaywrightTimeoutError:
-                    log(f"Selector not found: {sel}", flush=True)
-                    continue
+                    warnings.append("<article> selector not found; capturing fallback")
+                    log("Article element not found; using fallback", flush=True)
+            
+            if options.wait_after_load > 0:
+                log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
+                time.sleep(min(10.0, max(0.0, options.wait_after_load)))
+            if options.replace_video_posters:
+                log("Replacing video elements with posters...", flush=True)
+                page.evaluate(
+                    """
+                        document.querySelectorAll('video').forEach(v => {
+                            if (v.poster) {
+                                const img = document.createElement('img');
+                                img.src = v.poster;
+                                img.style.maxWidth = '100%';
+                                img.style.borderRadius = '12px';
+                                v.replaceWith(img);
+                            }
+                        });
+                    """
+                )
+            # Attempt platform-specific target capture if requested (and not PDF)
+            element_captured = False
+            if options.prefer_platform_target and format_name != "pdf":
+                log("Attempting platform-specific content capture...", flush=True)
                try:
-                    if el is not None:
-                        log(f"Found element with selector: {sel}", flush=True)
-                        try:
-                            el.scroll_into_view_if_needed(timeout=1000)
-                        except Exception:
-                            pass
-                        log(f"Capturing element to {destination}...", flush=True)
-                        el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
-                        element_captured = True
-                        log("Element captured successfully", flush=True)
-                        break
-                except Exception as exc:
-                    warnings.append(f"element capture failed for '{sel}': {exc}")
-                    log(f"Failed to capture element: {exc}", flush=True)
-        # Fallback to default capture paths
-        if element_captured:
-            pass
-        elif format_name == "pdf":
-            log("Generating PDF...", flush=True)
-            page.emulate_media(media="print")
-            page.pdf(path=str(destination), print_background=True)
-            log(f"PDF saved to {destination}", flush=True)
-        else:
-            log(f"Capturing full page to {destination}...", flush=True)
-            screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
-            if format_name == "jpeg":
-                screenshot_kwargs["type"] = "jpeg"
-                screenshot_kwargs["quality"] = 90
-            if options.full_page:
-                page.screenshot(full_page=True, **screenshot_kwargs)
+                    _platform_preprocess(options.url, page, warnings)
+                except Exception as e:
+                    debug(f"[_capture] Platform preprocess failed: {e}")
+                    pass
+                selectors = list(options.target_selectors or [])
+                if not selectors:
+                    selectors = _selectors_for_url(options.url)
+                
+                debug(f"[_capture] Trying selectors: {selectors}")
+                for sel in selectors:
+                    try:
+                        log(f"Trying selector: {sel}", flush=True)
+                        el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
+                    except PlaywrightTimeoutError:
+                        log(f"Selector not found: {sel}", flush=True)
+                        continue
+                    try:
+                        if el is not None:
+                            log(f"Found element with selector: {sel}", flush=True)
+                            try:
+                                el.scroll_into_view_if_needed(timeout=1000)
+                            except Exception:
+                                pass
+                            log(f"Capturing element to {destination}...", flush=True)
+                            el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
+                            element_captured = True
+                            log("Element captured successfully", flush=True)
+                            break
+                    except Exception as exc:
+                        warnings.append(f"element capture failed for '{sel}': {exc}")
+                        log(f"Failed to capture element: {exc}", flush=True)
+            # Fallback to default capture paths
+            if element_captured:
+                pass
+            elif format_name == "pdf":
+                log("Generating PDF...", flush=True)
+                page.emulate_media(media="print")
+                page.pdf(path=str(destination), print_background=True)
+                log(f"PDF saved to {destination}", flush=True)
            else:
-                article = page.query_selector("article")
-                if article is not None:
-                    article_kwargs = dict(screenshot_kwargs)
-                    article_kwargs.pop("full_page", None)
-                    article.screenshot(**article_kwargs)
+                log(f"Capturing full page to {destination}...", flush=True)
+                screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
+                if format_name == "jpeg":
+                    screenshot_kwargs["type"] = "jpeg"
+                    screenshot_kwargs["quality"] = 90
+                if options.full_page:
+                    page.screenshot(full_page=True, **screenshot_kwargs)
                else:
-                    page.screenshot(**screenshot_kwargs)
-            log(f"Screenshot saved to {destination}", flush=True)
+                    article = page.query_selector("article")
+                    if article is not None:
+                        article_kwargs = dict(screenshot_kwargs)
+                        article_kwargs.pop("full_page", None)
+                        article.screenshot(**article_kwargs)
+                    else:
+                        page.screenshot(**screenshot_kwargs)
+                log(f"Screenshot saved to {destination}", flush=True)
    except Exception as exc:
        debug(f"[_capture] Exception: {exc}")
        raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
-    finally:
-        log("Cleaning up browser resources...", flush=True)
-        with contextlib.suppress(Exception):
-            if context is not None:
-                context.close()
-        with contextlib.suppress(Exception):
-            if browser is not None:
-                browser.close()
-        with contextlib.suppress(Exception):
-            if playwright is not None:
-                playwright.stop()
-        log("Cleanup complete", flush=True)


 def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
@@ -511,8 +485,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    Screenshots are created using Playwright and marked as temporary
    so they can be cleaned up later with the cleanup cmdlet.
    """
-    from ._shared import parse_cmdlet_args
-    
    debug(f"[_run] screen-shot invoked with args: {args}")

    # Help check
@@ -534,6 +506,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    parsed = parse_cmdlet_args(args, CMDLET)
    
    format_value = parsed.get("format")
+    if not format_value:
+        # Default format can be set via config.conf tool block:
+        #   [tool=playwright]
+        #   format="pdf"
+        try:
+            tool_cfg = config.get("tool", {}) if isinstance(config, dict) else {}
+            pw_cfg = tool_cfg.get("playwright") if isinstance(tool_cfg, dict) else None
+            if isinstance(pw_cfg, dict):
+                format_value = pw_cfg.get("format")
+        except Exception:
+            pass
+    if not format_value:
+        format_value = "png"
    storage_value = parsed.get("storage")
    selector_arg = parsed.get("selector")
    selectors = [selector_arg] if selector_arg else []
@@ -669,6 +654,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
                prefer_platform_target=False,
                wait_for_article=False,
                full_page=True,
+                playwright_tool=PlaywrightTool(config),
            )
            
            screenshot_result = _capture_screenshot(options)