fixed screenshot capture and conversion

This commit is contained in:
2026-04-21 11:35:37 -07:00
parent 10e3cd009b
commit bc95a5c45d
2 changed files with 271 additions and 43 deletions
+257 -38
View File
@@ -135,6 +135,7 @@ class ScreenshotOptions:
selector_timeout_ms: int = 10_000
interactive_pick: bool = False
interactive_pick_timeout_s: float = 120.0
quality: int = 8
playwright_tool: Optional[PlaywrightTool] = None
@@ -280,6 +281,36 @@ def _normalize_capture_mode(value: Optional[str]) -> str:
return ""
def _normalize_quality(value: Any) -> int:
try:
quality = int(str(value).strip())
except Exception:
quality = 8
return max(1, min(10, quality))
def _jpeg_quality_from_level(level: int) -> int:
normalized = _normalize_quality(level)
if normalized >= 10:
return 100
return 45 + ((normalized - 1) * 6)
def _webp_quality_settings(level: int) -> Dict[str, Any]:
normalized = _normalize_quality(level)
if normalized >= 10:
return {
"quality": 100,
"method": 6,
"lossless": True,
}
return {
"quality": 45 + ((normalized - 1) * 6),
"method": 6,
"lossless": False,
}
def _stdin_interactive() -> bool:
try:
return bool(sys.stdin and sys.stdin.isatty())
@@ -589,6 +620,7 @@ def _capture_selector_screenshot(
destination: Path,
format_name: str,
selector_timeout_ms: int,
quality_level: int,
) -> None:
selector_text = str(selector or "").strip()
if not selector_text:
@@ -697,6 +729,89 @@ def _capture_selector_screenshot(
except Exception:
return None
def _read_viewport_rect() -> Optional[Dict[str, float]]:
try:
rect_value = locator.evaluate(
"""
(element) => {
const rect = element.getBoundingClientRect();
return {
left: rect.left,
top: rect.top,
right: rect.right,
bottom: rect.bottom,
width: rect.width,
height: rect.height,
};
}
"""
)
except Exception:
rect_value = None
if not isinstance(rect_value, dict):
return None
try:
return {
"left": float(rect_value.get("left") or 0.0),
"top": float(rect_value.get("top") or 0.0),
"right": float(rect_value.get("right") or 0.0),
"bottom": float(rect_value.get("bottom") or 0.0),
"width": max(1.0, float(rect_value.get("width") or 0.0)),
"height": max(1.0, float(rect_value.get("height") or 0.0)),
}
except Exception:
return None
def _read_scroll_metrics() -> Dict[str, float]:
try:
metrics_value = page.evaluate(
"""
() => {
const root = document.documentElement || document.body;
const body = document.body;
const scrollHeight = Math.max(
root ? root.scrollHeight || 0 : 0,
body ? body.scrollHeight || 0 : 0,
);
const innerWidth = window.innerWidth || 0;
const innerHeight = window.innerHeight || 0;
return {
scrollX: window.scrollX || window.pageXOffset || 0,
scrollY: window.scrollY || window.pageYOffset || 0,
innerWidth,
innerHeight,
maxScrollY: Math.max(0, scrollHeight - innerHeight),
};
}
"""
)
except Exception:
metrics_value = None
if not isinstance(metrics_value, dict):
return {
"scrollX": 0.0,
"scrollY": 0.0,
"innerWidth": max(1.0, current_viewport_width),
"innerHeight": max(1.0, current_viewport_height),
"maxScrollY": 0.0,
}
try:
return {
"scrollX": max(0.0, float(metrics_value.get("scrollX") or 0.0)),
"scrollY": max(0.0, float(metrics_value.get("scrollY") or 0.0)),
"innerWidth": max(1.0, float(metrics_value.get("innerWidth") or current_viewport_width or 1.0)),
"innerHeight": max(1.0, float(metrics_value.get("innerHeight") or current_viewport_height or 1.0)),
"maxScrollY": max(0.0, float(metrics_value.get("maxScrollY") or 0.0)),
}
except Exception:
return {
"scrollX": 0.0,
"scrollY": 0.0,
"innerWidth": max(1.0, current_viewport_width),
"innerHeight": max(1.0, current_viewport_height),
"maxScrollY": 0.0,
}
stable_clip: Optional[Dict[str, float]] = None
stable_reads = 0
previous_clip: Optional[Dict[str, float]] = None
@@ -780,45 +895,118 @@ def _capture_selector_screenshot(
f"Pillow is required for tall element capture: {exc}"
) from exc
try:
full_page_bytes = page.screenshot(
full_page=True,
timeout=timeout_ms,
type="png",
)
except Exception as exc:
raise ScreenshotError(
f"Could not capture full-page screenshot for selector '{selector_text}': {exc}"
) from exc
padding = 2.0
crop_left = max(0, int(page_rect["x"] - padding))
crop_top = max(0, int(page_rect["y"] - padding))
crop_right = max(crop_left + 1, int(page_rect["x"] + page_rect["width"] + padding + 0.9999))
crop_bottom = max(crop_top + 1, int(page_rect["y"] + page_rect["height"] + padding + 0.9999))
output_left = max(0.0, page_rect["x"] - padding)
output_top = max(0.0, page_rect["y"] - padding)
output_width = max(1, int(page_rect["width"] + (padding * 2.0) + 0.9999))
output_height = max(1, int(page_rect["height"] + (padding * 2.0) + 0.9999))
canvas_mode = "RGB" if format_name == "jpeg" else "RGBA"
canvas_bg = (255, 255, 255) if canvas_mode == "RGB" else (255, 255, 255, 0)
stitched = Image.new(canvas_mode, (output_width, output_height), canvas_bg)
stitched_bottom = 0
overlap_px = 24
step_cursor = 0
max_iterations = max(10, int((output_height / max(1.0, current_viewport_height)) * 6.0) + 12)
try:
with Image.open(io.BytesIO(full_page_bytes)) as full_page_image:
bounded_box = (
max(0, min(crop_left, full_page_image.width - 1)),
max(0, min(crop_top, full_page_image.height - 1)),
max(1, min(crop_right, full_page_image.width)),
max(1, min(crop_bottom, full_page_image.height)),
for _ in range(max_iterations):
metrics = _read_scroll_metrics()
desired_scroll_y = min(
metrics["maxScrollY"],
max(0.0, output_top + float(step_cursor)),
)
cropped = full_page_image.crop(bounded_box)
save_kwargs: Dict[str, Any] = {}
if format_name == "jpeg":
cropped = cropped.convert("RGB")
save_kwargs.update({"format": "JPEG", "quality": 90})
else:
if cropped.mode == "P":
cropped = cropped.convert("RGBA")
save_kwargs.update({"format": "PNG"})
cropped.save(destination, **save_kwargs)
page.evaluate("(y) => window.scrollTo(0, y)", desired_scroll_y)
page.wait_for_timeout(125)
try:
locator.evaluate(
"""
async () => {
await new Promise((resolve) => requestAnimationFrame(() => requestAnimationFrame(resolve)));
}
"""
)
except Exception:
pass
metrics = _read_scroll_metrics()
viewport_rect = _read_viewport_rect()
if viewport_rect is None:
continue
visible_left = max(0.0, viewport_rect["left"] - padding)
visible_top = max(0.0, viewport_rect["top"] - padding)
visible_right = min(metrics["innerWidth"], viewport_rect["right"] + padding)
visible_bottom = min(metrics["innerHeight"], viewport_rect["bottom"] + padding)
if visible_right <= visible_left or visible_bottom <= visible_top:
if metrics["scrollY"] >= metrics["maxScrollY"]:
break
step_cursor += max(1, int(metrics["innerHeight"] * 0.6))
continue
clip_box = {
"x": float(int(visible_left)),
"y": float(int(visible_top)),
"width": float(int((visible_right - visible_left) + 0.9999)),
"height": float(int((visible_bottom - visible_top) + 0.9999)),
}
piece_bytes = page.screenshot(
timeout=timeout_ms,
type="png",
clip=clip_box,
)
capture_page_x = metrics["scrollX"] + visible_left
capture_page_y = metrics["scrollY"] + visible_top
paste_x = int(round(capture_page_x - output_left))
paste_y = int(round(capture_page_y - output_top))
with Image.open(io.BytesIO(piece_bytes)) as piece_image:
if canvas_mode == "RGB":
piece = piece_image.convert("RGB")
else:
piece = piece_image.convert("RGBA")
crop_left = max(0, -paste_x)
crop_top = max(0, -paste_y)
crop_right = min(piece.width, output_width - paste_x)
crop_bottom = min(piece.height, output_height - paste_y)
if crop_right <= crop_left or crop_bottom <= crop_top:
continue
if crop_left or crop_top or crop_right != piece.width or crop_bottom != piece.height:
piece = piece.crop((crop_left, crop_top, crop_right, crop_bottom))
dest_x = max(0, paste_x + crop_left)
dest_y = max(0, paste_y + crop_top)
stitched.paste(piece, (dest_x, dest_y))
piece_bottom = dest_y + piece.height
if piece_bottom <= stitched_bottom + 1:
if metrics["scrollY"] >= metrics["maxScrollY"]:
break
step_cursor += max(1, int(metrics["innerHeight"] * 0.6))
continue
stitched_bottom = max(stitched_bottom, piece_bottom)
if stitched_bottom >= output_height:
break
step_cursor = max(0, stitched_bottom - overlap_px)
if stitched_bottom <= 0:
raise ScreenshotError(
f"Could not capture stitched slices for selector '{selector_text}'"
)
save_kwargs: Dict[str, Any] = {}
if format_name == "jpeg":
save_kwargs.update({"format": "JPEG", "quality": _jpeg_quality_from_level(quality_level)})
else:
save_kwargs.update({"format": "PNG"})
stitched.save(destination, **save_kwargs)
return
except ScreenshotError:
raise
except Exception as exc:
raise ScreenshotError(
f"Could not crop full-page screenshot for selector '{selector_text}': {exc}"
f"Could not stitch tall selector capture for '{selector_text}': {exc}"
) from exc
padding = 2.0
@@ -841,7 +1029,7 @@ def _capture_selector_screenshot(
}
if format_name == "jpeg":
screenshot_kwargs["type"] = "jpeg"
screenshot_kwargs["quality"] = 90
screenshot_kwargs["quality"] = _jpeg_quality_from_level(quality_level)
page.screenshot(**screenshot_kwargs)
@@ -852,6 +1040,7 @@ def _convert_to_webp(
*,
quality: int = 90,
method: int = 6,
lossless: bool = False,
max_dim: int = WEBP_MAX_DIM,
downscale_if_oversize: bool = True,
) -> bool:
@@ -883,6 +1072,7 @@ def _convert_to_webp(
"format": "WEBP",
"quality": int(quality),
"method": int(method),
"lossless": bool(lossless),
}
# Preserve alpha when present; Pillow handles it for WEBP.
@@ -1188,6 +1378,7 @@ def _capture(
[
("url", options.url),
("format", _normalize_format(options.output_format)),
("quality", options.quality),
("browser", getattr(defaults, "browser", "unknown") if defaults else "unknown"),
("headless", getattr(defaults, "headless", "unknown") if defaults else "unknown"),
(
@@ -1219,7 +1410,11 @@ def _capture(
element_captured = False
if options.interactive_pick and format_name != "pdf":
selected_selector = ""
with tool.open_page(headless=picker_headless) as page:
with tool.open_page(
headless=picker_headless,
emulate_viewport=picker_headless,
start_maximized=not picker_headless,
) as page:
navigation_status = _prepare_capture_page(
tool,
page,
@@ -1255,6 +1450,7 @@ def _capture(
destination,
format_name,
options.selector_timeout_ms,
options.quality,
)
element_captured = True
else:
@@ -1285,6 +1481,7 @@ def _capture(
destination,
format_name,
options.selector_timeout_ms,
options.quality,
)
element_captured = True
capture_mode = "selector"
@@ -1310,7 +1507,7 @@ def _capture(
}
if format_name == "jpeg":
screenshot_kwargs["type"] = "jpeg"
screenshot_kwargs["quality"] = 90
screenshot_kwargs["quality"] = _jpeg_quality_from_level(options.quality)
if options.full_page:
progress.step("capturing output")
page.screenshot(full_page=True, **screenshot_kwargs)
@@ -1404,10 +1601,22 @@ def _capture_screenshot(
if requested_format == "webp":
progress.step("capturing converting to webp")
try:
did_downscale = _convert_to_webp(capture_path, destination)
webp_settings = _webp_quality_settings(options.quality)
did_downscale = _convert_to_webp(
capture_path,
destination,
quality=int(webp_settings["quality"]),
method=int(webp_settings["method"]),
lossless=bool(webp_settings["lossless"]),
)
if did_downscale:
try:
destination.unlink(missing_ok=True)
except Exception:
pass
destination = capture_path
warnings.append(
f"webp conversion used downscaling to fit {WEBP_MAX_DIM}px limit; keeping original png: {capture_path.name}"
f"webp conversion required downscaling to fit {WEBP_MAX_DIM}px limit; using original png instead: {capture_path.name}"
)
else:
try:
@@ -1475,6 +1684,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
format_value = parsed.get("format")
capture_mode_value = _normalize_capture_mode(parsed.get("capture_mode"))
quality_value = _normalize_quality(parsed.get("quality"))
if not format_value:
try:
tool_cfg = config.get("tool", {}) if isinstance(config, dict) else {}
@@ -1549,6 +1759,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
("urls", [u for u, _ in url_to_process]),
("archive", archive_enabled),
("format", format_name),
("quality", quality_value),
("capture_mode", capture_mode_value or ("interactive" if interactive_default and format_name != "pdf" else "auto")),
("output_dir", screenshot_dir),
("output_dir_source", screenshot_dir_source),
@@ -1622,6 +1833,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
wait_for_article=False,
full_page=True,
interactive_pick=False,
quality=quality_value,
playwright_tool=shared_playwright_tool,
)
@@ -1731,7 +1943,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
CMDLET = Cmdlet(
name="screen-shot",
summary="Capture a website screenshot",
usage="screen-shot <url> [options] [-query \"format:full\"]",
usage="screen-shot <url> [options] [-query \"format:full quality:10\"]",
alias=["screenshot",
"ss"],
arg=[
@@ -1748,6 +1960,12 @@ CMDLET = Cmdlet(
query_only=True,
description="Capture mode via -query, e.g. format:full or format:interactive"
),
sh.QueryArg(
"quality",
key="quality",
query_only=True,
description="Screenshot quality via -query, 1-10. 10 uses highest quality and lossless webp."
),
CmdletArg(
name="selector",
type="string",
@@ -1762,6 +1980,7 @@ CMDLET = Cmdlet(
"Screenshots are temporary artifacts stored in the configured `temp` directory.",
"Interactive single-URL runs open a headful browser picker by default so you can hover and click the element to capture.",
"Use -query \"format:full\" to bypass the picker and capture the full page directly.",
"Use -query \"quality:1\" through \"quality:10\" to control jpeg/webp compression. quality:10 uses lossless webp.",
],
)
+14 -5
View File
@@ -274,6 +274,8 @@ class PlaywrightTool:
viewport_height: Optional[int] = None,
ignore_https_errors: Optional[bool] = None,
accept_downloads: bool = False,
emulate_viewport: bool = True,
start_maximized: bool = False,
) -> Iterator[Any]:
"""Context manager yielding a Playwright page with sane defaults."""
self.require()
@@ -314,19 +316,26 @@ class PlaywrightTool:
if browser_type is None:
browser_type = pw.chromium
launch_args = ["--disable-blink-features=AutomationControlled"]
if bool(start_maximized) and not h:
launch_args.append("--start-maximized")
browser = browser_type.launch(
headless=h,
args=["--disable-blink-features=AutomationControlled"],
args=launch_args,
)
context_kwargs: Dict[str,
Any] = {
"viewport": {
"width": vw,
"height": vh
},
"ignore_https_errors": ihe,
"accept_downloads": bool(accept_downloads),
}
if bool(emulate_viewport):
context_kwargs["viewport"] = {
"width": vw,
"height": vh,
}
else:
context_kwargs["no_viewport"] = True
if ua_value is not None:
context_kwargs["user_agent"] = ua_value