hkjh
This commit is contained in:
@@ -323,6 +323,16 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
debug(f"[_capture] Starting capture for {options.url} -> {destination}")
|
||||
try:
|
||||
tool = options.playwright_tool or PlaywrightTool({})
|
||||
|
||||
# Ensure Chromium engine is used for the screen-shot cmdlet (force for consistency)
|
||||
try:
|
||||
current_browser = getattr(tool.defaults, "browser", "").lower() if getattr(tool, "defaults", None) is not None else ""
|
||||
if current_browser != "chromium":
|
||||
debug(f"[_capture] Overriding Playwright browser '{current_browser}' -> 'chromium' for screen-shot cmdlet")
|
||||
tool = PlaywrightTool({"tool": {"playwright": {"browser": "chromium"}}})
|
||||
except Exception:
|
||||
tool = PlaywrightTool({"tool": {"playwright": {"browser": "chromium"}}})
|
||||
|
||||
tool.debug_dump()
|
||||
|
||||
log("Launching browser...", flush=True)
|
||||
@@ -333,104 +343,114 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
if format_name == "pdf" and not options.headless:
|
||||
warnings.append("pdf output requires headless Chromium; overriding headless mode")
|
||||
|
||||
with tool.open_page(headless=headless) as page:
|
||||
log(f"Navigating to {options.url}...", flush=True)
|
||||
try:
|
||||
tool.goto(page, options.url)
|
||||
log("Page loaded successfully", flush=True)
|
||||
except PlaywrightTimeoutError:
|
||||
warnings.append("navigation timeout; capturing current page state")
|
||||
log("Navigation timeout; proceeding with current state", flush=True)
|
||||
|
||||
# Skip article lookup by default (wait_for_article defaults to False)
|
||||
if options.wait_for_article:
|
||||
try:
|
||||
with tool.open_page(headless=headless) as page:
|
||||
log(f"Navigating to {options.url}...", flush=True)
|
||||
try:
|
||||
log("Waiting for article element...", flush=True)
|
||||
page.wait_for_selector("article", timeout=10_000)
|
||||
log("Article element found", flush=True)
|
||||
tool.goto(page, options.url)
|
||||
log("Page loaded successfully", flush=True)
|
||||
except PlaywrightTimeoutError:
|
||||
warnings.append("<article> selector not found; capturing fallback")
|
||||
log("Article element not found; using fallback", flush=True)
|
||||
|
||||
if options.wait_after_load > 0:
|
||||
log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
|
||||
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
|
||||
if options.replace_video_posters:
|
||||
log("Replacing video elements with posters...", flush=True)
|
||||
page.evaluate(
|
||||
"""
|
||||
document.querySelectorAll('video').forEach(v => {
|
||||
if (v.poster) {
|
||||
const img = document.createElement('img');
|
||||
img.src = v.poster;
|
||||
img.style.maxWidth = '100%';
|
||||
img.style.borderRadius = '12px';
|
||||
v.replaceWith(img);
|
||||
}
|
||||
});
|
||||
"""
|
||||
)
|
||||
# Attempt platform-specific target capture if requested (and not PDF)
|
||||
element_captured = False
|
||||
if options.prefer_platform_target and format_name != "pdf":
|
||||
log("Attempting platform-specific content capture...", flush=True)
|
||||
try:
|
||||
_platform_preprocess(options.url, page, warnings)
|
||||
except Exception as e:
|
||||
debug(f"[_capture] Platform preprocess failed: {e}")
|
||||
pass
|
||||
selectors = list(options.target_selectors or [])
|
||||
if not selectors:
|
||||
selectors = _selectors_for_url(options.url)
|
||||
warnings.append("navigation timeout; capturing current page state")
|
||||
log("Navigation timeout; proceeding with current state", flush=True)
|
||||
|
||||
debug(f"[_capture] Trying selectors: {selectors}")
|
||||
for sel in selectors:
|
||||
# Skip article lookup by default (wait_for_article defaults to False)
|
||||
if options.wait_for_article:
|
||||
try:
|
||||
log(f"Trying selector: {sel}", flush=True)
|
||||
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
|
||||
log("Waiting for article element...", flush=True)
|
||||
page.wait_for_selector("article", timeout=10_000)
|
||||
log("Article element found", flush=True)
|
||||
except PlaywrightTimeoutError:
|
||||
log(f"Selector not found: {sel}", flush=True)
|
||||
continue
|
||||
warnings.append("<article> selector not found; capturing fallback")
|
||||
log("Article element not found; using fallback", flush=True)
|
||||
|
||||
if options.wait_after_load > 0:
|
||||
log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
|
||||
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
|
||||
if options.replace_video_posters:
|
||||
log("Replacing video elements with posters...", flush=True)
|
||||
page.evaluate(
|
||||
"""
|
||||
document.querySelectorAll('video').forEach(v => {
|
||||
if (v.poster) {
|
||||
const img = document.createElement('img');
|
||||
img.src = v.poster;
|
||||
img.style.maxWidth = '100%';
|
||||
img.style.borderRadius = '12px';
|
||||
v.replaceWith(img);
|
||||
}
|
||||
});
|
||||
"""
|
||||
)
|
||||
# Attempt platform-specific target capture if requested (and not PDF)
|
||||
element_captured = False
|
||||
if options.prefer_platform_target and format_name != "pdf":
|
||||
log("Attempting platform-specific content capture...", flush=True)
|
||||
try:
|
||||
if el is not None:
|
||||
log(f"Found element with selector: {sel}", flush=True)
|
||||
try:
|
||||
el.scroll_into_view_if_needed(timeout=1000)
|
||||
except Exception:
|
||||
pass
|
||||
log(f"Capturing element to {destination}...", flush=True)
|
||||
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
|
||||
element_captured = True
|
||||
log("Element captured successfully", flush=True)
|
||||
break
|
||||
except Exception as exc:
|
||||
warnings.append(f"element capture failed for '{sel}': {exc}")
|
||||
log(f"Failed to capture element: {exc}", flush=True)
|
||||
# Fallback to default capture paths
|
||||
if element_captured:
|
||||
pass
|
||||
elif format_name == "pdf":
|
||||
log("Generating PDF...", flush=True)
|
||||
page.emulate_media(media="print")
|
||||
page.pdf(path=str(destination), print_background=True)
|
||||
log(f"PDF saved to {destination}", flush=True)
|
||||
else:
|
||||
log(f"Capturing full page to {destination}...", flush=True)
|
||||
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
|
||||
if format_name == "jpeg":
|
||||
screenshot_kwargs["type"] = "jpeg"
|
||||
screenshot_kwargs["quality"] = 90
|
||||
if options.full_page:
|
||||
page.screenshot(full_page=True, **screenshot_kwargs)
|
||||
_platform_preprocess(options.url, page, warnings)
|
||||
except Exception as e:
|
||||
debug(f"[_capture] Platform preprocess failed: {e}")
|
||||
pass
|
||||
selectors = list(options.target_selectors or [])
|
||||
if not selectors:
|
||||
selectors = _selectors_for_url(options.url)
|
||||
|
||||
debug(f"[_capture] Trying selectors: {selectors}")
|
||||
for sel in selectors:
|
||||
try:
|
||||
log(f"Trying selector: {sel}", flush=True)
|
||||
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
|
||||
except PlaywrightTimeoutError:
|
||||
log(f"Selector not found: {sel}", flush=True)
|
||||
continue
|
||||
try:
|
||||
if el is not None:
|
||||
log(f"Found element with selector: {sel}", flush=True)
|
||||
try:
|
||||
el.scroll_into_view_if_needed(timeout=1000)
|
||||
except Exception:
|
||||
pass
|
||||
log(f"Capturing element to {destination}...", flush=True)
|
||||
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
|
||||
element_captured = True
|
||||
log("Element captured successfully", flush=True)
|
||||
break
|
||||
except Exception as exc:
|
||||
warnings.append(f"element capture failed for '{sel}': {exc}")
|
||||
log(f"Failed to capture element: {exc}", flush=True)
|
||||
# Fallback to default capture paths
|
||||
if element_captured:
|
||||
pass
|
||||
elif format_name == "pdf":
|
||||
log("Generating PDF...", flush=True)
|
||||
page.emulate_media(media="print")
|
||||
page.pdf(path=str(destination), print_background=True)
|
||||
log(f"PDF saved to {destination}", flush=True)
|
||||
else:
|
||||
article = page.query_selector("article")
|
||||
if article is not None:
|
||||
article_kwargs = dict(screenshot_kwargs)
|
||||
article_kwargs.pop("full_page", None)
|
||||
article.screenshot(**article_kwargs)
|
||||
log(f"Capturing full page to {destination}...", flush=True)
|
||||
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
|
||||
if format_name == "jpeg":
|
||||
screenshot_kwargs["type"] = "jpeg"
|
||||
screenshot_kwargs["quality"] = 90
|
||||
if options.full_page:
|
||||
page.screenshot(full_page=True, **screenshot_kwargs)
|
||||
else:
|
||||
page.screenshot(**screenshot_kwargs)
|
||||
log(f"Screenshot saved to {destination}", flush=True)
|
||||
article = page.query_selector("article")
|
||||
if article is not None:
|
||||
article_kwargs = dict(screenshot_kwargs)
|
||||
article_kwargs.pop("full_page", None)
|
||||
article.screenshot(**article_kwargs)
|
||||
else:
|
||||
page.screenshot(**screenshot_kwargs)
|
||||
log(f"Screenshot saved to {destination}", flush=True)
|
||||
except Exception as exc:
|
||||
debug(f"[_capture] Exception launching browser/page: {exc}")
|
||||
msg = str(exc).lower()
|
||||
if any(k in msg for k in ["executable", "not found", "no such file", "cannot find", "install"]):
|
||||
raise ScreenshotError("Chromium Playwright browser binaries not found. Install them: python ./scripts/setup.py --playwright-only --browsers chromium") from exc
|
||||
raise
|
||||
except ScreenshotError:
|
||||
# Re-raise ScreenshotError raised intentionally (do not wrap)
|
||||
raise
|
||||
except Exception as exc:
|
||||
debug(f"[_capture] Exception: {exc}")
|
||||
raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
|
||||
@@ -645,6 +665,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
try:
|
||||
# Create screenshot with provided options
|
||||
# Force the Playwright engine to Chromium for the screen-shot cmdlet
|
||||
# (this ensures consistent rendering and supports PDF output requirements).
|
||||
pw_local_cfg = {}
|
||||
if isinstance(config, dict):
|
||||
tool_block = dict(config.get("tool") or {})
|
||||
pw_block = dict(tool_block.get("playwright") or {})
|
||||
pw_block["browser"] = "chromium"
|
||||
tool_block["playwright"] = pw_block
|
||||
pw_local_cfg = dict(config)
|
||||
pw_local_cfg["tool"] = tool_block
|
||||
else:
|
||||
pw_local_cfg = {"tool": {"playwright": {"browser": "chromium"}}}
|
||||
|
||||
options = ScreenshotOptions(
|
||||
url=url,
|
||||
output_dir=screenshot_dir,
|
||||
@@ -654,7 +687,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
prefer_platform_target=False,
|
||||
wait_for_article=False,
|
||||
full_page=True,
|
||||
playwright_tool=PlaywrightTool(config),
|
||||
playwright_tool=PlaywrightTool(pw_local_cfg),
|
||||
)
|
||||
|
||||
screenshot_result = _capture_screenshot(options)
|
||||
@@ -744,12 +777,11 @@ CMDLET = Cmdlet(
|
||||
CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
|
||||
|
||||
],
|
||||
detail=
|
||||
["""
|
||||
|
||||
|
||||
|
||||
"""]
|
||||
detail=[
|
||||
"Uses Playwright Chromium engine only. Install Chromium with: python ./scripts/setup.py --playwright-only --browsers chromium",
|
||||
"PDF output requires headless Chromium (the cmdlet will enforce headless mode for PDF).",
|
||||
"Screenshots are temporary artifacts stored in the configured `temp` directory.",
|
||||
]
|
||||
)
|
||||
|
||||
CMDLET.exec = _run
|
||||
|
||||
Reference in New Issue
Block a user