diff --git a/cmdlet/screen_shot.py b/cmdlet/screen_shot.py index 42edd49..6c96d73 100644 --- a/cmdlet/screen_shot.py +++ b/cmdlet/screen_shot.py @@ -135,6 +135,7 @@ class ScreenshotOptions: selector_timeout_ms: int = 10_000 interactive_pick: bool = False interactive_pick_timeout_s: float = 120.0 + quality: int = 8 playwright_tool: Optional[PlaywrightTool] = None @@ -280,6 +281,36 @@ def _normalize_capture_mode(value: Optional[str]) -> str: return "" +def _normalize_quality(value: Any) -> int: + try: + quality = int(str(value).strip()) + except Exception: + quality = 8 + return max(1, min(10, quality)) + + +def _jpeg_quality_from_level(level: int) -> int: + normalized = _normalize_quality(level) + if normalized >= 10: + return 100 + return 45 + ((normalized - 1) * 6) + + +def _webp_quality_settings(level: int) -> Dict[str, Any]: + normalized = _normalize_quality(level) + if normalized >= 10: + return { + "quality": 100, + "method": 6, + "lossless": True, + } + return { + "quality": 45 + ((normalized - 1) * 6), + "method": 6, + "lossless": False, + } + + def _stdin_interactive() -> bool: try: return bool(sys.stdin and sys.stdin.isatty()) @@ -589,6 +620,7 @@ def _capture_selector_screenshot( destination: Path, format_name: str, selector_timeout_ms: int, + quality_level: int, ) -> None: selector_text = str(selector or "").strip() if not selector_text: @@ -697,6 +729,89 @@ def _capture_selector_screenshot( except Exception: return None + def _read_viewport_rect() -> Optional[Dict[str, float]]: + try: + rect_value = locator.evaluate( + """ + (element) => { + const rect = element.getBoundingClientRect(); + return { + left: rect.left, + top: rect.top, + right: rect.right, + bottom: rect.bottom, + width: rect.width, + height: rect.height, + }; + } + """ + ) + except Exception: + rect_value = None + if not isinstance(rect_value, dict): + return None + try: + return { + "left": float(rect_value.get("left") or 0.0), + "top": float(rect_value.get("top") or 0.0), + "right": float(rect_value.get("right") or 0.0), + "bottom": float(rect_value.get("bottom") or 0.0), + "width": max(1.0, float(rect_value.get("width") or 0.0)), + "height": max(1.0, float(rect_value.get("height") or 0.0)), + } + except Exception: + return None + + def _read_scroll_metrics() -> Dict[str, float]: + try: + metrics_value = page.evaluate( + """ + () => { + const root = document.documentElement || document.body; + const body = document.body; + const scrollHeight = Math.max( + root ? root.scrollHeight || 0 : 0, + body ? body.scrollHeight || 0 : 0, + ); + const innerWidth = window.innerWidth || 0; + const innerHeight = window.innerHeight || 0; + return { + scrollX: window.scrollX || window.pageXOffset || 0, + scrollY: window.scrollY || window.pageYOffset || 0, + innerWidth, + innerHeight, + maxScrollY: Math.max(0, scrollHeight - innerHeight), + }; + } + """ + ) + except Exception: + metrics_value = None + if not isinstance(metrics_value, dict): + return { + "scrollX": 0.0, + "scrollY": 0.0, + "innerWidth": max(1.0, current_viewport_width), + "innerHeight": max(1.0, current_viewport_height), + "maxScrollY": 0.0, + } + try: + return { + "scrollX": max(0.0, float(metrics_value.get("scrollX") or 0.0)), + "scrollY": max(0.0, float(metrics_value.get("scrollY") or 0.0)), + "innerWidth": max(1.0, float(metrics_value.get("innerWidth") or current_viewport_width or 1.0)), + "innerHeight": max(1.0, float(metrics_value.get("innerHeight") or current_viewport_height or 1.0)), + "maxScrollY": max(0.0, float(metrics_value.get("maxScrollY") or 0.0)), + } + except Exception: + return { + "scrollX": 0.0, + "scrollY": 0.0, + "innerWidth": max(1.0, current_viewport_width), + "innerHeight": max(1.0, current_viewport_height), + "maxScrollY": 0.0, + } + stable_clip: Optional[Dict[str, float]] = None stable_reads = 0 previous_clip: Optional[Dict[str, float]] = None @@ -780,45 +895,118 @@ def _capture_selector_screenshot( f"Pillow is required for tall element capture: {exc}" ) from exc - try: - full_page_bytes = page.screenshot( - full_page=True, - timeout=timeout_ms, - type="png", - ) - except Exception as exc: - raise ScreenshotError( - f"Could not capture full-page screenshot for selector '{selector_text}': {exc}" - ) from exc - padding = 2.0 - crop_left = max(0, int(page_rect["x"] - padding)) - crop_top = max(0, int(page_rect["y"] - padding)) - crop_right = max(crop_left + 1, int(page_rect["x"] + page_rect["width"] + padding + 0.9999)) - crop_bottom = max(crop_top + 1, int(page_rect["y"] + page_rect["height"] + padding + 0.9999)) + output_left = max(0.0, page_rect["x"] - padding) + output_top = max(0.0, page_rect["y"] - padding) + output_width = max(1, int(page_rect["width"] + (padding * 2.0) + 0.9999)) + output_height = max(1, int(page_rect["height"] + (padding * 2.0) + 0.9999)) + canvas_mode = "RGB" if format_name == "jpeg" else "RGBA" + canvas_bg = (255, 255, 255) if canvas_mode == "RGB" else (255, 255, 255, 0) + stitched = Image.new(canvas_mode, (output_width, output_height), canvas_bg) + stitched_bottom = 0 + overlap_px = 24 + step_cursor = 0 + max_iterations = max(10, int((output_height / max(1.0, current_viewport_height)) * 6.0) + 12) try: - with Image.open(io.BytesIO(full_page_bytes)) as full_page_image: - bounded_box = ( - max(0, min(crop_left, full_page_image.width - 1)), - max(0, min(crop_top, full_page_image.height - 1)), - max(1, min(crop_right, full_page_image.width)), - max(1, min(crop_bottom, full_page_image.height)), + for _ in range(max_iterations): + metrics = _read_scroll_metrics() + desired_scroll_y = min( + metrics["maxScrollY"], + max(0.0, output_top + float(step_cursor)), ) - cropped = full_page_image.crop(bounded_box) - save_kwargs: Dict[str, Any] = {} - if format_name == "jpeg": - cropped = cropped.convert("RGB") - save_kwargs.update({"format": "JPEG", "quality": 90}) - else: - if cropped.mode == "P": - cropped = cropped.convert("RGBA") - save_kwargs.update({"format": "PNG"}) - cropped.save(destination, **save_kwargs) + page.evaluate("(y) => window.scrollTo(0, y)", desired_scroll_y) + page.wait_for_timeout(125) + try: + locator.evaluate( + """ + async () => { + await new Promise((resolve) => requestAnimationFrame(() => requestAnimationFrame(resolve))); + } + """ + ) + except Exception: + pass + + metrics = _read_scroll_metrics() + viewport_rect = _read_viewport_rect() + if viewport_rect is None: + continue + + visible_left = max(0.0, viewport_rect["left"] - padding) + visible_top = max(0.0, viewport_rect["top"] - padding) + visible_right = min(metrics["innerWidth"], viewport_rect["right"] + padding) + visible_bottom = min(metrics["innerHeight"], viewport_rect["bottom"] + padding) + if visible_right <= visible_left or visible_bottom <= visible_top: + if metrics["scrollY"] >= metrics["maxScrollY"]: + break + step_cursor += max(1, int(metrics["innerHeight"] * 0.6)) + continue + + clip_box = { + "x": float(int(visible_left)), + "y": float(int(visible_top)), + "width": float(int((visible_right - visible_left) + 0.9999)), + "height": float(int((visible_bottom - visible_top) + 0.9999)), + } + piece_bytes = page.screenshot( + timeout=timeout_ms, + type="png", + clip=clip_box, + ) + + capture_page_x = metrics["scrollX"] + visible_left + capture_page_y = metrics["scrollY"] + visible_top + paste_x = int(round(capture_page_x - output_left)) + paste_y = int(round(capture_page_y - output_top)) + + with Image.open(io.BytesIO(piece_bytes)) as piece_image: + if canvas_mode == "RGB": + piece = piece_image.convert("RGB") + else: + piece = piece_image.convert("RGBA") + + crop_left = max(0, -paste_x) + crop_top = max(0, -paste_y) + crop_right = min(piece.width, output_width - paste_x) + crop_bottom = min(piece.height, output_height - paste_y) + if crop_right <= crop_left or crop_bottom <= crop_top: + continue + if crop_left or crop_top or crop_right != piece.width or crop_bottom != piece.height: + piece = piece.crop((crop_left, crop_top, crop_right, crop_bottom)) + dest_x = max(0, paste_x + crop_left) + dest_y = max(0, paste_y + crop_top) + stitched.paste(piece, (dest_x, dest_y)) + piece_bottom = dest_y + piece.height + + if piece_bottom <= stitched_bottom + 1: + if metrics["scrollY"] >= metrics["maxScrollY"]: + break + step_cursor += max(1, int(metrics["innerHeight"] * 0.6)) + continue + + stitched_bottom = max(stitched_bottom, piece_bottom) + if stitched_bottom >= output_height: + break + step_cursor = max(0, stitched_bottom - overlap_px) + + if stitched_bottom <= 0: + raise ScreenshotError( + f"Could not capture stitched slices for selector '{selector_text}'" + ) + + save_kwargs: Dict[str, Any] = {} + if format_name == "jpeg": + save_kwargs.update({"format": "JPEG", "quality": _jpeg_quality_from_level(quality_level)}) + else: + save_kwargs.update({"format": "PNG"}) + stitched.save(destination, **save_kwargs) return + except ScreenshotError: + raise except Exception as exc: raise ScreenshotError( - f"Could not crop full-page screenshot for selector '{selector_text}': {exc}" + f"Could not stitch tall selector capture for '{selector_text}': {exc}" ) from exc padding = 2.0 @@ -841,7 +1029,7 @@ def _capture_selector_screenshot( } if format_name == "jpeg": screenshot_kwargs["type"] = "jpeg" - screenshot_kwargs["quality"] = 90 + screenshot_kwargs["quality"] = _jpeg_quality_from_level(quality_level) page.screenshot(**screenshot_kwargs) @@ -852,6 +1040,7 @@ def _convert_to_webp( *, quality: int = 90, method: int = 6, + lossless: bool = False, max_dim: int = WEBP_MAX_DIM, downscale_if_oversize: bool = True, ) -> bool: @@ -883,6 +1072,7 @@ def _convert_to_webp( "format": "WEBP", "quality": int(quality), "method": int(method), + "lossless": bool(lossless), } # Preserve alpha when present; Pillow handles it for WEBP. @@ -1188,6 +1378,7 @@ def _capture( [ ("url", options.url), ("format", _normalize_format(options.output_format)), + ("quality", options.quality), ("browser", getattr(defaults, "browser", "unknown") if defaults else "unknown"), ("headless", getattr(defaults, "headless", "unknown") if defaults else "unknown"), ( @@ -1219,7 +1410,11 @@ def _capture( element_captured = False if options.interactive_pick and format_name != "pdf": selected_selector = "" - with tool.open_page(headless=picker_headless) as page: + with tool.open_page( + headless=picker_headless, + emulate_viewport=picker_headless, + start_maximized=not picker_headless, + ) as page: navigation_status = _prepare_capture_page( tool, page, @@ -1255,6 +1450,7 @@ def _capture( destination, format_name, options.selector_timeout_ms, + options.quality, ) element_captured = True else: @@ -1285,6 +1481,7 @@ def _capture( destination, format_name, options.selector_timeout_ms, + options.quality, ) element_captured = True capture_mode = "selector" @@ -1310,7 +1507,7 @@ def _capture( } if format_name == "jpeg": screenshot_kwargs["type"] = "jpeg" - screenshot_kwargs["quality"] = 90 + screenshot_kwargs["quality"] = _jpeg_quality_from_level(options.quality) if options.full_page: progress.step("capturing output") page.screenshot(full_page=True, **screenshot_kwargs) @@ -1404,10 +1601,22 @@ def _capture_screenshot( if requested_format == "webp": progress.step("capturing converting to webp") try: - did_downscale = _convert_to_webp(capture_path, destination) + webp_settings = _webp_quality_settings(options.quality) + did_downscale = _convert_to_webp( + capture_path, + destination, + quality=int(webp_settings["quality"]), + method=int(webp_settings["method"]), + lossless=bool(webp_settings["lossless"]), + ) if did_downscale: + try: + destination.unlink(missing_ok=True) + except Exception: + pass + destination = capture_path warnings.append( - f"webp conversion used downscaling to fit {WEBP_MAX_DIM}px limit; keeping original png: {capture_path.name}" + f"webp conversion required downscaling to fit {WEBP_MAX_DIM}px limit; using original png instead: {capture_path.name}" ) else: try: @@ -1475,6 +1684,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: format_value = parsed.get("format") capture_mode_value = _normalize_capture_mode(parsed.get("capture_mode")) + quality_value = _normalize_quality(parsed.get("quality")) if not format_value: try: tool_cfg = config.get("tool", {}) if isinstance(config, dict) else {} @@ -1549,6 +1759,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: ("urls", [u for u, _ in url_to_process]), ("archive", archive_enabled), ("format", format_name), + ("quality", quality_value), ("capture_mode", capture_mode_value or ("interactive" if interactive_default and format_name != "pdf" else "auto")), ("output_dir", screenshot_dir), ("output_dir_source", screenshot_dir_source), @@ -1622,6 +1833,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: wait_for_article=False, full_page=True, interactive_pick=False, + quality=quality_value, playwright_tool=shared_playwright_tool, ) @@ -1731,7 +1943,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: CMDLET = Cmdlet( name="screen-shot", summary="Capture a website screenshot", - usage="screen-shot [options] [-query \"format:full\"]", + usage="screen-shot [options] [-query \"format:full quality:10\"]", alias=["screenshot", "ss"], arg=[ @@ -1748,6 +1960,12 @@ CMDLET = Cmdlet( query_only=True, description="Capture mode via -query, e.g. format:full or format:interactive" ), + sh.QueryArg( + "quality", + key="quality", + query_only=True, + description="Screenshot quality via -query, 1-10. 10 uses highest quality and lossless webp." + ), CmdletArg( name="selector", type="string", @@ -1762,6 +1980,7 @@ CMDLET = Cmdlet( "Screenshots are temporary artifacts stored in the configured `temp` directory.", "Interactive single-URL runs open a headful browser picker by default so you can hover and click the element to capture.", "Use -query \"format:full\" to bypass the picker and capture the full page directly.", + "Use -query \"quality:1\" through \"quality:10\" to control jpeg/webp compression. quality:10 uses lossless webp.", ], ) diff --git a/tool/playwright.py b/tool/playwright.py index 467aa1d..eb84a30 100644 --- a/tool/playwright.py +++ b/tool/playwright.py @@ -274,6 +274,8 @@ class PlaywrightTool: viewport_height: Optional[int] = None, ignore_https_errors: Optional[bool] = None, accept_downloads: bool = False, + emulate_viewport: bool = True, + start_maximized: bool = False, ) -> Iterator[Any]: """Context manager yielding a Playwright page with sane defaults.""" self.require() @@ -314,19 +316,26 @@ class PlaywrightTool: if browser_type is None: browser_type = pw.chromium + launch_args = ["--disable-blink-features=AutomationControlled"] + if bool(start_maximized) and not h: + launch_args.append("--start-maximized") + browser = browser_type.launch( headless=h, - args=["--disable-blink-features=AutomationControlled"], + args=launch_args, ) context_kwargs: Dict[str, Any] = { - "viewport": { - "width": vw, - "height": vh - }, "ignore_https_errors": ihe, "accept_downloads": bool(accept_downloads), } + if bool(emulate_viewport): + context_kwargs["viewport"] = { + "width": vw, + "height": vh, + } + else: + context_kwargs["no_viewport"] = True if ua_value is not None: context_kwargs["user_agent"] = ua_value