hkjh

2025-12-17 17:42:46 -08:00
parent d2e7385280
commit 76691dbbf5
9 changed files with 762 additions and 119 deletions
--- a/cmdlet/screen_shot.py
+++ b/cmdlet/screen_shot.py
@@ -323,6 +323,16 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
    debug(f"[_capture] Starting capture for {options.url} -> {destination}")
    try:
        tool = options.playwright_tool or PlaywrightTool({})
+
+        # Ensure Chromium engine is used for the screen-shot cmdlet (force for consistency)
+        try:
+            current_browser = getattr(tool.defaults, "browser", "").lower() if getattr(tool, "defaults", None) is not None else ""
+            if current_browser != "chromium":
+                debug(f"[_capture] Overriding Playwright browser '{current_browser}' -> 'chromium' for screen-shot cmdlet")
+                tool = PlaywrightTool({"tool": {"playwright": {"browser": "chromium"}}})
+        except Exception:
+            tool = PlaywrightTool({"tool": {"playwright": {"browser": "chromium"}}})
+
        tool.debug_dump()

        log("Launching browser...", flush=True)
@@ -333,104 +343,114 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
        if format_name == "pdf" and not options.headless:
            warnings.append("pdf output requires headless Chromium; overriding headless mode")

-        with tool.open_page(headless=headless) as page:
-            log(f"Navigating to {options.url}...", flush=True)
-            try:
-                tool.goto(page, options.url)
-                log("Page loaded successfully", flush=True)
-            except PlaywrightTimeoutError:
-                warnings.append("navigation timeout; capturing current page state")
-                log("Navigation timeout; proceeding with current state", flush=True)
-            
-            # Skip article lookup by default (wait_for_article defaults to False)
-            if options.wait_for_article:
+        try:
+            with tool.open_page(headless=headless) as page:
+                log(f"Navigating to {options.url}...", flush=True)
                try:
-                    log("Waiting for article element...", flush=True)
-                    page.wait_for_selector("article", timeout=10_000)
-                    log("Article element found", flush=True)
+                    tool.goto(page, options.url)
+                    log("Page loaded successfully", flush=True)
                except PlaywrightTimeoutError:
-                    warnings.append("<article> selector not found; capturing fallback")
-                    log("Article element not found; using fallback", flush=True)
-            
-            if options.wait_after_load > 0:
-                log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
-                time.sleep(min(10.0, max(0.0, options.wait_after_load)))
-            if options.replace_video_posters:
-                log("Replacing video elements with posters...", flush=True)
-                page.evaluate(
-                    """
-                        document.querySelectorAll('video').forEach(v => {
-                            if (v.poster) {
-                                const img = document.createElement('img');
-                                img.src = v.poster;
-                                img.style.maxWidth = '100%';
-                                img.style.borderRadius = '12px';
-                                v.replaceWith(img);
-                            }
-                        });
-                    """
-                )
-            # Attempt platform-specific target capture if requested (and not PDF)
-            element_captured = False
-            if options.prefer_platform_target and format_name != "pdf":
-                log("Attempting platform-specific content capture...", flush=True)
-                try:
-                    _platform_preprocess(options.url, page, warnings)
-                except Exception as e:
-                    debug(f"[_capture] Platform preprocess failed: {e}")
-                    pass
-                selectors = list(options.target_selectors or [])
-                if not selectors:
-                    selectors = _selectors_for_url(options.url)
+                    warnings.append("navigation timeout; capturing current page state")
+                    log("Navigation timeout; proceeding with current state", flush=True)
                
-                debug(f"[_capture] Trying selectors: {selectors}")
-                for sel in selectors:
+                # Skip article lookup by default (wait_for_article defaults to False)
+                if options.wait_for_article:
                    try:
-                        log(f"Trying selector: {sel}", flush=True)
-                        el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
+                        log("Waiting for article element...", flush=True)
+                        page.wait_for_selector("article", timeout=10_000)
+                        log("Article element found", flush=True)
                    except PlaywrightTimeoutError:
-                        log(f"Selector not found: {sel}", flush=True)
-                        continue
+                        warnings.append("<article> selector not found; capturing fallback")
+                        log("Article element not found; using fallback", flush=True)
+                
+                if options.wait_after_load > 0:
+                    log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
+                    time.sleep(min(10.0, max(0.0, options.wait_after_load)))
+                if options.replace_video_posters:
+                    log("Replacing video elements with posters...", flush=True)
+                    page.evaluate(
+                        """
+                            document.querySelectorAll('video').forEach(v => {
+                                if (v.poster) {
+                                    const img = document.createElement('img');
+                                    img.src = v.poster;
+                                    img.style.maxWidth = '100%';
+                                    img.style.borderRadius = '12px';
+                                    v.replaceWith(img);
+                                }
+                            });
+                        """
+                    )
+                # Attempt platform-specific target capture if requested (and not PDF)
+                element_captured = False
+                if options.prefer_platform_target and format_name != "pdf":
+                    log("Attempting platform-specific content capture...", flush=True)
                    try:
-                        if el is not None:
-                            log(f"Found element with selector: {sel}", flush=True)
-                            try:
-                                el.scroll_into_view_if_needed(timeout=1000)
-                            except Exception:
-                                pass
-                            log(f"Capturing element to {destination}...", flush=True)
-                            el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
-                            element_captured = True
-                            log("Element captured successfully", flush=True)
-                            break
-                    except Exception as exc:
-                        warnings.append(f"element capture failed for '{sel}': {exc}")
-                        log(f"Failed to capture element: {exc}", flush=True)
-            # Fallback to default capture paths
-            if element_captured:
-                pass
-            elif format_name == "pdf":
-                log("Generating PDF...", flush=True)
-                page.emulate_media(media="print")
-                page.pdf(path=str(destination), print_background=True)
-                log(f"PDF saved to {destination}", flush=True)
-            else:
-                log(f"Capturing full page to {destination}...", flush=True)
-                screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
-                if format_name == "jpeg":
-                    screenshot_kwargs["type"] = "jpeg"
-                    screenshot_kwargs["quality"] = 90
-                if options.full_page:
-                    page.screenshot(full_page=True, **screenshot_kwargs)
+                        _platform_preprocess(options.url, page, warnings)
+                    except Exception as e:
+                        debug(f"[_capture] Platform preprocess failed: {e}")
+                        pass
+                    selectors = list(options.target_selectors or [])
+                    if not selectors:
+                        selectors = _selectors_for_url(options.url)
+                    
+                    debug(f"[_capture] Trying selectors: {selectors}")
+                    for sel in selectors:
+                        try:
+                            log(f"Trying selector: {sel}", flush=True)
+                            el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
+                        except PlaywrightTimeoutError:
+                            log(f"Selector not found: {sel}", flush=True)
+                            continue
+                        try:
+                            if el is not None:
+                                log(f"Found element with selector: {sel}", flush=True)
+                                try:
+                                    el.scroll_into_view_if_needed(timeout=1000)
+                                except Exception:
+                                    pass
+                                log(f"Capturing element to {destination}...", flush=True)
+                                el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
+                                element_captured = True
+                                log("Element captured successfully", flush=True)
+                                break
+                        except Exception as exc:
+                            warnings.append(f"element capture failed for '{sel}': {exc}")
+                            log(f"Failed to capture element: {exc}", flush=True)
+                # Fallback to default capture paths
+                if element_captured:
+                    pass
+                elif format_name == "pdf":
+                    log("Generating PDF...", flush=True)
+                    page.emulate_media(media="print")
+                    page.pdf(path=str(destination), print_background=True)
+                    log(f"PDF saved to {destination}", flush=True)
                else:
-                    article = page.query_selector("article")
-                    if article is not None:
-                        article_kwargs = dict(screenshot_kwargs)
-                        article_kwargs.pop("full_page", None)
-                        article.screenshot(**article_kwargs)
+                    log(f"Capturing full page to {destination}...", flush=True)
+                    screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
+                    if format_name == "jpeg":
+                        screenshot_kwargs["type"] = "jpeg"
+                        screenshot_kwargs["quality"] = 90
+                    if options.full_page:
+                        page.screenshot(full_page=True, **screenshot_kwargs)
                    else:
-                        page.screenshot(**screenshot_kwargs)
-                log(f"Screenshot saved to {destination}", flush=True)
+                        article = page.query_selector("article")
+                        if article is not None:
+                            article_kwargs = dict(screenshot_kwargs)
+                            article_kwargs.pop("full_page", None)
+                            article.screenshot(**article_kwargs)
+                        else:
+                            page.screenshot(**screenshot_kwargs)
+                    log(f"Screenshot saved to {destination}", flush=True)
+        except Exception as exc:
+            debug(f"[_capture] Exception launching browser/page: {exc}")
+            msg = str(exc).lower()
+            if any(k in msg for k in ["executable", "not found", "no such file", "cannot find", "install"]):
+                raise ScreenshotError("Chromium Playwright browser binaries not found. Install them: python ./scripts/setup.py --playwright-only --browsers chromium") from exc
+            raise
+    except ScreenshotError:
+        # Re-raise ScreenshotError raised intentionally (do not wrap)
+        raise
    except Exception as exc:
        debug(f"[_capture] Exception: {exc}")
        raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
@@ -645,6 +665,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        
        try:
            # Create screenshot with provided options
+            # Force the Playwright engine to Chromium for the screen-shot cmdlet
+            # (this ensures consistent rendering and supports PDF output requirements).
+            pw_local_cfg = {}
+            if isinstance(config, dict):
+                tool_block = dict(config.get("tool") or {})
+                pw_block = dict(tool_block.get("playwright") or {})
+                pw_block["browser"] = "chromium"
+                tool_block["playwright"] = pw_block
+                pw_local_cfg = dict(config)
+                pw_local_cfg["tool"] = tool_block
+            else:
+                pw_local_cfg = {"tool": {"playwright": {"browser": "chromium"}}}
+
            options = ScreenshotOptions(
                url=url,
                output_dir=screenshot_dir,
@@ -654,7 +687,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
                prefer_platform_target=False,
                wait_for_article=False,
                full_page=True,
-                playwright_tool=PlaywrightTool(config),
+                playwright_tool=PlaywrightTool(pw_local_cfg),
            )
            
            screenshot_result = _capture_screenshot(options)
@@ -744,12 +777,11 @@ CMDLET = Cmdlet(
        CmdletArg(name="selector", type="string", description="CSS selector for element capture"),

    ],
-    detail=
-    ["""
-        
-
-        
-    """]
+    detail=[
+        "Uses Playwright Chromium engine only. Install Chromium with: python ./scripts/setup.py --playwright-only --browsers chromium",
+        "PDF output requires headless Chromium (the cmdlet will enforce headless mode for PDF).",
+        "Screenshots are temporary artifacts stored in the configured `temp` directory.",
+    ]
 )

 CMDLET.exec = _run