fixed screenshot capture and conversion
This commit is contained in:
+257
-38
@@ -135,6 +135,7 @@ class ScreenshotOptions:
|
||||
selector_timeout_ms: int = 10_000
|
||||
interactive_pick: bool = False
|
||||
interactive_pick_timeout_s: float = 120.0
|
||||
quality: int = 8
|
||||
playwright_tool: Optional[PlaywrightTool] = None
|
||||
|
||||
|
||||
@@ -280,6 +281,36 @@ def _normalize_capture_mode(value: Optional[str]) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
def _normalize_quality(value: Any) -> int:
|
||||
try:
|
||||
quality = int(str(value).strip())
|
||||
except Exception:
|
||||
quality = 8
|
||||
return max(1, min(10, quality))
|
||||
|
||||
|
||||
def _jpeg_quality_from_level(level: int) -> int:
|
||||
normalized = _normalize_quality(level)
|
||||
if normalized >= 10:
|
||||
return 100
|
||||
return 45 + ((normalized - 1) * 6)
|
||||
|
||||
|
||||
def _webp_quality_settings(level: int) -> Dict[str, Any]:
|
||||
normalized = _normalize_quality(level)
|
||||
if normalized >= 10:
|
||||
return {
|
||||
"quality": 100,
|
||||
"method": 6,
|
||||
"lossless": True,
|
||||
}
|
||||
return {
|
||||
"quality": 45 + ((normalized - 1) * 6),
|
||||
"method": 6,
|
||||
"lossless": False,
|
||||
}
|
||||
|
||||
|
||||
def _stdin_interactive() -> bool:
|
||||
try:
|
||||
return bool(sys.stdin and sys.stdin.isatty())
|
||||
@@ -589,6 +620,7 @@ def _capture_selector_screenshot(
|
||||
destination: Path,
|
||||
format_name: str,
|
||||
selector_timeout_ms: int,
|
||||
quality_level: int,
|
||||
) -> None:
|
||||
selector_text = str(selector or "").strip()
|
||||
if not selector_text:
|
||||
@@ -697,6 +729,89 @@ def _capture_selector_screenshot(
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _read_viewport_rect() -> Optional[Dict[str, float]]:
|
||||
try:
|
||||
rect_value = locator.evaluate(
|
||||
"""
|
||||
(element) => {
|
||||
const rect = element.getBoundingClientRect();
|
||||
return {
|
||||
left: rect.left,
|
||||
top: rect.top,
|
||||
right: rect.right,
|
||||
bottom: rect.bottom,
|
||||
width: rect.width,
|
||||
height: rect.height,
|
||||
};
|
||||
}
|
||||
"""
|
||||
)
|
||||
except Exception:
|
||||
rect_value = None
|
||||
if not isinstance(rect_value, dict):
|
||||
return None
|
||||
try:
|
||||
return {
|
||||
"left": float(rect_value.get("left") or 0.0),
|
||||
"top": float(rect_value.get("top") or 0.0),
|
||||
"right": float(rect_value.get("right") or 0.0),
|
||||
"bottom": float(rect_value.get("bottom") or 0.0),
|
||||
"width": max(1.0, float(rect_value.get("width") or 0.0)),
|
||||
"height": max(1.0, float(rect_value.get("height") or 0.0)),
|
||||
}
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _read_scroll_metrics() -> Dict[str, float]:
|
||||
try:
|
||||
metrics_value = page.evaluate(
|
||||
"""
|
||||
() => {
|
||||
const root = document.documentElement || document.body;
|
||||
const body = document.body;
|
||||
const scrollHeight = Math.max(
|
||||
root ? root.scrollHeight || 0 : 0,
|
||||
body ? body.scrollHeight || 0 : 0,
|
||||
);
|
||||
const innerWidth = window.innerWidth || 0;
|
||||
const innerHeight = window.innerHeight || 0;
|
||||
return {
|
||||
scrollX: window.scrollX || window.pageXOffset || 0,
|
||||
scrollY: window.scrollY || window.pageYOffset || 0,
|
||||
innerWidth,
|
||||
innerHeight,
|
||||
maxScrollY: Math.max(0, scrollHeight - innerHeight),
|
||||
};
|
||||
}
|
||||
"""
|
||||
)
|
||||
except Exception:
|
||||
metrics_value = None
|
||||
if not isinstance(metrics_value, dict):
|
||||
return {
|
||||
"scrollX": 0.0,
|
||||
"scrollY": 0.0,
|
||||
"innerWidth": max(1.0, current_viewport_width),
|
||||
"innerHeight": max(1.0, current_viewport_height),
|
||||
"maxScrollY": 0.0,
|
||||
}
|
||||
try:
|
||||
return {
|
||||
"scrollX": max(0.0, float(metrics_value.get("scrollX") or 0.0)),
|
||||
"scrollY": max(0.0, float(metrics_value.get("scrollY") or 0.0)),
|
||||
"innerWidth": max(1.0, float(metrics_value.get("innerWidth") or current_viewport_width or 1.0)),
|
||||
"innerHeight": max(1.0, float(metrics_value.get("innerHeight") or current_viewport_height or 1.0)),
|
||||
"maxScrollY": max(0.0, float(metrics_value.get("maxScrollY") or 0.0)),
|
||||
}
|
||||
except Exception:
|
||||
return {
|
||||
"scrollX": 0.0,
|
||||
"scrollY": 0.0,
|
||||
"innerWidth": max(1.0, current_viewport_width),
|
||||
"innerHeight": max(1.0, current_viewport_height),
|
||||
"maxScrollY": 0.0,
|
||||
}
|
||||
|
||||
stable_clip: Optional[Dict[str, float]] = None
|
||||
stable_reads = 0
|
||||
previous_clip: Optional[Dict[str, float]] = None
|
||||
@@ -780,45 +895,118 @@ def _capture_selector_screenshot(
|
||||
f"Pillow is required for tall element capture: {exc}"
|
||||
) from exc
|
||||
|
||||
try:
|
||||
full_page_bytes = page.screenshot(
|
||||
full_page=True,
|
||||
timeout=timeout_ms,
|
||||
type="png",
|
||||
)
|
||||
except Exception as exc:
|
||||
raise ScreenshotError(
|
||||
f"Could not capture full-page screenshot for selector '{selector_text}': {exc}"
|
||||
) from exc
|
||||
|
||||
padding = 2.0
|
||||
crop_left = max(0, int(page_rect["x"] - padding))
|
||||
crop_top = max(0, int(page_rect["y"] - padding))
|
||||
crop_right = max(crop_left + 1, int(page_rect["x"] + page_rect["width"] + padding + 0.9999))
|
||||
crop_bottom = max(crop_top + 1, int(page_rect["y"] + page_rect["height"] + padding + 0.9999))
|
||||
output_left = max(0.0, page_rect["x"] - padding)
|
||||
output_top = max(0.0, page_rect["y"] - padding)
|
||||
output_width = max(1, int(page_rect["width"] + (padding * 2.0) + 0.9999))
|
||||
output_height = max(1, int(page_rect["height"] + (padding * 2.0) + 0.9999))
|
||||
canvas_mode = "RGB" if format_name == "jpeg" else "RGBA"
|
||||
canvas_bg = (255, 255, 255) if canvas_mode == "RGB" else (255, 255, 255, 0)
|
||||
stitched = Image.new(canvas_mode, (output_width, output_height), canvas_bg)
|
||||
stitched_bottom = 0
|
||||
overlap_px = 24
|
||||
step_cursor = 0
|
||||
max_iterations = max(10, int((output_height / max(1.0, current_viewport_height)) * 6.0) + 12)
|
||||
|
||||
try:
|
||||
with Image.open(io.BytesIO(full_page_bytes)) as full_page_image:
|
||||
bounded_box = (
|
||||
max(0, min(crop_left, full_page_image.width - 1)),
|
||||
max(0, min(crop_top, full_page_image.height - 1)),
|
||||
max(1, min(crop_right, full_page_image.width)),
|
||||
max(1, min(crop_bottom, full_page_image.height)),
|
||||
for _ in range(max_iterations):
|
||||
metrics = _read_scroll_metrics()
|
||||
desired_scroll_y = min(
|
||||
metrics["maxScrollY"],
|
||||
max(0.0, output_top + float(step_cursor)),
|
||||
)
|
||||
cropped = full_page_image.crop(bounded_box)
|
||||
save_kwargs: Dict[str, Any] = {}
|
||||
if format_name == "jpeg":
|
||||
cropped = cropped.convert("RGB")
|
||||
save_kwargs.update({"format": "JPEG", "quality": 90})
|
||||
else:
|
||||
if cropped.mode == "P":
|
||||
cropped = cropped.convert("RGBA")
|
||||
save_kwargs.update({"format": "PNG"})
|
||||
cropped.save(destination, **save_kwargs)
|
||||
page.evaluate("(y) => window.scrollTo(0, y)", desired_scroll_y)
|
||||
page.wait_for_timeout(125)
|
||||
try:
|
||||
locator.evaluate(
|
||||
"""
|
||||
async () => {
|
||||
await new Promise((resolve) => requestAnimationFrame(() => requestAnimationFrame(resolve)));
|
||||
}
|
||||
"""
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
metrics = _read_scroll_metrics()
|
||||
viewport_rect = _read_viewport_rect()
|
||||
if viewport_rect is None:
|
||||
continue
|
||||
|
||||
visible_left = max(0.0, viewport_rect["left"] - padding)
|
||||
visible_top = max(0.0, viewport_rect["top"] - padding)
|
||||
visible_right = min(metrics["innerWidth"], viewport_rect["right"] + padding)
|
||||
visible_bottom = min(metrics["innerHeight"], viewport_rect["bottom"] + padding)
|
||||
if visible_right <= visible_left or visible_bottom <= visible_top:
|
||||
if metrics["scrollY"] >= metrics["maxScrollY"]:
|
||||
break
|
||||
step_cursor += max(1, int(metrics["innerHeight"] * 0.6))
|
||||
continue
|
||||
|
||||
clip_box = {
|
||||
"x": float(int(visible_left)),
|
||||
"y": float(int(visible_top)),
|
||||
"width": float(int((visible_right - visible_left) + 0.9999)),
|
||||
"height": float(int((visible_bottom - visible_top) + 0.9999)),
|
||||
}
|
||||
piece_bytes = page.screenshot(
|
||||
timeout=timeout_ms,
|
||||
type="png",
|
||||
clip=clip_box,
|
||||
)
|
||||
|
||||
capture_page_x = metrics["scrollX"] + visible_left
|
||||
capture_page_y = metrics["scrollY"] + visible_top
|
||||
paste_x = int(round(capture_page_x - output_left))
|
||||
paste_y = int(round(capture_page_y - output_top))
|
||||
|
||||
with Image.open(io.BytesIO(piece_bytes)) as piece_image:
|
||||
if canvas_mode == "RGB":
|
||||
piece = piece_image.convert("RGB")
|
||||
else:
|
||||
piece = piece_image.convert("RGBA")
|
||||
|
||||
crop_left = max(0, -paste_x)
|
||||
crop_top = max(0, -paste_y)
|
||||
crop_right = min(piece.width, output_width - paste_x)
|
||||
crop_bottom = min(piece.height, output_height - paste_y)
|
||||
if crop_right <= crop_left or crop_bottom <= crop_top:
|
||||
continue
|
||||
if crop_left or crop_top or crop_right != piece.width or crop_bottom != piece.height:
|
||||
piece = piece.crop((crop_left, crop_top, crop_right, crop_bottom))
|
||||
dest_x = max(0, paste_x + crop_left)
|
||||
dest_y = max(0, paste_y + crop_top)
|
||||
stitched.paste(piece, (dest_x, dest_y))
|
||||
piece_bottom = dest_y + piece.height
|
||||
|
||||
if piece_bottom <= stitched_bottom + 1:
|
||||
if metrics["scrollY"] >= metrics["maxScrollY"]:
|
||||
break
|
||||
step_cursor += max(1, int(metrics["innerHeight"] * 0.6))
|
||||
continue
|
||||
|
||||
stitched_bottom = max(stitched_bottom, piece_bottom)
|
||||
if stitched_bottom >= output_height:
|
||||
break
|
||||
step_cursor = max(0, stitched_bottom - overlap_px)
|
||||
|
||||
if stitched_bottom <= 0:
|
||||
raise ScreenshotError(
|
||||
f"Could not capture stitched slices for selector '{selector_text}'"
|
||||
)
|
||||
|
||||
save_kwargs: Dict[str, Any] = {}
|
||||
if format_name == "jpeg":
|
||||
save_kwargs.update({"format": "JPEG", "quality": _jpeg_quality_from_level(quality_level)})
|
||||
else:
|
||||
save_kwargs.update({"format": "PNG"})
|
||||
stitched.save(destination, **save_kwargs)
|
||||
return
|
||||
except ScreenshotError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise ScreenshotError(
|
||||
f"Could not crop full-page screenshot for selector '{selector_text}': {exc}"
|
||||
f"Could not stitch tall selector capture for '{selector_text}': {exc}"
|
||||
) from exc
|
||||
|
||||
padding = 2.0
|
||||
@@ -841,7 +1029,7 @@ def _capture_selector_screenshot(
|
||||
}
|
||||
if format_name == "jpeg":
|
||||
screenshot_kwargs["type"] = "jpeg"
|
||||
screenshot_kwargs["quality"] = 90
|
||||
screenshot_kwargs["quality"] = _jpeg_quality_from_level(quality_level)
|
||||
|
||||
page.screenshot(**screenshot_kwargs)
|
||||
|
||||
@@ -852,6 +1040,7 @@ def _convert_to_webp(
|
||||
*,
|
||||
quality: int = 90,
|
||||
method: int = 6,
|
||||
lossless: bool = False,
|
||||
max_dim: int = WEBP_MAX_DIM,
|
||||
downscale_if_oversize: bool = True,
|
||||
) -> bool:
|
||||
@@ -883,6 +1072,7 @@ def _convert_to_webp(
|
||||
"format": "WEBP",
|
||||
"quality": int(quality),
|
||||
"method": int(method),
|
||||
"lossless": bool(lossless),
|
||||
}
|
||||
|
||||
# Preserve alpha when present; Pillow handles it for WEBP.
|
||||
@@ -1188,6 +1378,7 @@ def _capture(
|
||||
[
|
||||
("url", options.url),
|
||||
("format", _normalize_format(options.output_format)),
|
||||
("quality", options.quality),
|
||||
("browser", getattr(defaults, "browser", "unknown") if defaults else "unknown"),
|
||||
("headless", getattr(defaults, "headless", "unknown") if defaults else "unknown"),
|
||||
(
|
||||
@@ -1219,7 +1410,11 @@ def _capture(
|
||||
element_captured = False
|
||||
if options.interactive_pick and format_name != "pdf":
|
||||
selected_selector = ""
|
||||
with tool.open_page(headless=picker_headless) as page:
|
||||
with tool.open_page(
|
||||
headless=picker_headless,
|
||||
emulate_viewport=picker_headless,
|
||||
start_maximized=not picker_headless,
|
||||
) as page:
|
||||
navigation_status = _prepare_capture_page(
|
||||
tool,
|
||||
page,
|
||||
@@ -1255,6 +1450,7 @@ def _capture(
|
||||
destination,
|
||||
format_name,
|
||||
options.selector_timeout_ms,
|
||||
options.quality,
|
||||
)
|
||||
element_captured = True
|
||||
else:
|
||||
@@ -1285,6 +1481,7 @@ def _capture(
|
||||
destination,
|
||||
format_name,
|
||||
options.selector_timeout_ms,
|
||||
options.quality,
|
||||
)
|
||||
element_captured = True
|
||||
capture_mode = "selector"
|
||||
@@ -1310,7 +1507,7 @@ def _capture(
|
||||
}
|
||||
if format_name == "jpeg":
|
||||
screenshot_kwargs["type"] = "jpeg"
|
||||
screenshot_kwargs["quality"] = 90
|
||||
screenshot_kwargs["quality"] = _jpeg_quality_from_level(options.quality)
|
||||
if options.full_page:
|
||||
progress.step("capturing output")
|
||||
page.screenshot(full_page=True, **screenshot_kwargs)
|
||||
@@ -1404,10 +1601,22 @@ def _capture_screenshot(
|
||||
if requested_format == "webp":
|
||||
progress.step("capturing converting to webp")
|
||||
try:
|
||||
did_downscale = _convert_to_webp(capture_path, destination)
|
||||
webp_settings = _webp_quality_settings(options.quality)
|
||||
did_downscale = _convert_to_webp(
|
||||
capture_path,
|
||||
destination,
|
||||
quality=int(webp_settings["quality"]),
|
||||
method=int(webp_settings["method"]),
|
||||
lossless=bool(webp_settings["lossless"]),
|
||||
)
|
||||
if did_downscale:
|
||||
try:
|
||||
destination.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
destination = capture_path
|
||||
warnings.append(
|
||||
f"webp conversion used downscaling to fit {WEBP_MAX_DIM}px limit; keeping original png: {capture_path.name}"
|
||||
f"webp conversion required downscaling to fit {WEBP_MAX_DIM}px limit; using original png instead: {capture_path.name}"
|
||||
)
|
||||
else:
|
||||
try:
|
||||
@@ -1475,6 +1684,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
format_value = parsed.get("format")
|
||||
capture_mode_value = _normalize_capture_mode(parsed.get("capture_mode"))
|
||||
quality_value = _normalize_quality(parsed.get("quality"))
|
||||
if not format_value:
|
||||
try:
|
||||
tool_cfg = config.get("tool", {}) if isinstance(config, dict) else {}
|
||||
@@ -1549,6 +1759,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
("urls", [u for u, _ in url_to_process]),
|
||||
("archive", archive_enabled),
|
||||
("format", format_name),
|
||||
("quality", quality_value),
|
||||
("capture_mode", capture_mode_value or ("interactive" if interactive_default and format_name != "pdf" else "auto")),
|
||||
("output_dir", screenshot_dir),
|
||||
("output_dir_source", screenshot_dir_source),
|
||||
@@ -1622,6 +1833,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
wait_for_article=False,
|
||||
full_page=True,
|
||||
interactive_pick=False,
|
||||
quality=quality_value,
|
||||
playwright_tool=shared_playwright_tool,
|
||||
)
|
||||
|
||||
@@ -1731,7 +1943,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
CMDLET = Cmdlet(
|
||||
name="screen-shot",
|
||||
summary="Capture a website screenshot",
|
||||
usage="screen-shot <url> [options] [-query \"format:full\"]",
|
||||
usage="screen-shot <url> [options] [-query \"format:full quality:10\"]",
|
||||
alias=["screenshot",
|
||||
"ss"],
|
||||
arg=[
|
||||
@@ -1748,6 +1960,12 @@ CMDLET = Cmdlet(
|
||||
query_only=True,
|
||||
description="Capture mode via -query, e.g. format:full or format:interactive"
|
||||
),
|
||||
sh.QueryArg(
|
||||
"quality",
|
||||
key="quality",
|
||||
query_only=True,
|
||||
description="Screenshot quality via -query, 1-10. 10 uses highest quality and lossless webp."
|
||||
),
|
||||
CmdletArg(
|
||||
name="selector",
|
||||
type="string",
|
||||
@@ -1762,6 +1980,7 @@ CMDLET = Cmdlet(
|
||||
"Screenshots are temporary artifacts stored in the configured `temp` directory.",
|
||||
"Interactive single-URL runs open a headful browser picker by default so you can hover and click the element to capture.",
|
||||
"Use -query \"format:full\" to bypass the picker and capture the full page directly.",
|
||||
"Use -query \"quality:1\" through \"quality:10\" to control jpeg/webp compression. quality:10 uses lossless webp.",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
+14
-5
@@ -274,6 +274,8 @@ class PlaywrightTool:
|
||||
viewport_height: Optional[int] = None,
|
||||
ignore_https_errors: Optional[bool] = None,
|
||||
accept_downloads: bool = False,
|
||||
emulate_viewport: bool = True,
|
||||
start_maximized: bool = False,
|
||||
) -> Iterator[Any]:
|
||||
"""Context manager yielding a Playwright page with sane defaults."""
|
||||
self.require()
|
||||
@@ -314,19 +316,26 @@ class PlaywrightTool:
|
||||
if browser_type is None:
|
||||
browser_type = pw.chromium
|
||||
|
||||
launch_args = ["--disable-blink-features=AutomationControlled"]
|
||||
if bool(start_maximized) and not h:
|
||||
launch_args.append("--start-maximized")
|
||||
|
||||
browser = browser_type.launch(
|
||||
headless=h,
|
||||
args=["--disable-blink-features=AutomationControlled"],
|
||||
args=launch_args,
|
||||
)
|
||||
context_kwargs: Dict[str,
|
||||
Any] = {
|
||||
"viewport": {
|
||||
"width": vw,
|
||||
"height": vh
|
||||
},
|
||||
"ignore_https_errors": ihe,
|
||||
"accept_downloads": bool(accept_downloads),
|
||||
}
|
||||
if bool(emulate_viewport):
|
||||
context_kwargs["viewport"] = {
|
||||
"width": vw,
|
||||
"height": vh,
|
||||
}
|
||||
else:
|
||||
context_kwargs["no_viewport"] = True
|
||||
if ua_value is not None:
|
||||
context_kwargs["user_agent"] = ua_value
|
||||
|
||||
|
||||
Reference in New Issue
Block a user