This commit is contained in:
nose
2025-12-20 23:57:44 -08:00
parent b75faa49a2
commit 8ca5783970
39 changed files with 4294 additions and 1722 deletions

View File

@@ -266,27 +266,27 @@ def _archive_url(url: str, timeout: float) -> Tuple[List[str], List[str]]:
(_submit_archive_ph, "archive.ph"),
):
try:
log(f"Archiving to {label}...", flush=True)
debug(f"Archiving to {label}...")
archived = submitter(url, timeout)
except httpx.HTTPStatusError as exc:
if exc.response.status_code == 429:
warnings.append(f"archive {label} rate limited (HTTP 429)")
log(f"{label}: Rate limited (HTTP 429)", flush=True)
debug(f"{label}: Rate limited (HTTP 429)")
else:
warnings.append(f"archive {label} failed: HTTP {exc.response.status_code}")
log(f"{label}: HTTP {exc.response.status_code}", flush=True)
debug(f"{label}: HTTP {exc.response.status_code}")
except httpx.RequestError as exc:
warnings.append(f"archive {label} failed: {exc}")
log(f"{label}: Connection error: {exc}", flush=True)
debug(f"{label}: Connection error: {exc}")
except Exception as exc:
warnings.append(f"archive {label} failed: {exc}")
log(f"{label}: {exc}", flush=True)
debug(f"{label}: {exc}")
else:
if archived:
archives.append(archived)
log(f"{label}: Success - {archived}", flush=True)
debug(f"{label}: Success - {archived}")
else:
log(f"{label}: No archive link returned", flush=True)
debug(f"{label}: No archive link returned")
return archives, warnings
@@ -335,7 +335,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
tool.debug_dump()
log("Launching browser...", flush=True)
debug("Launching browser...")
format_name = _normalise_format(options.output_format)
headless = options.headless or format_name == "pdf"
debug(f"[_capture] Format: {format_name}, Headless: {headless}")
@@ -345,29 +345,29 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
try:
with tool.open_page(headless=headless) as page:
log(f"Navigating to {options.url}...", flush=True)
debug(f"Navigating to {options.url}...")
try:
tool.goto(page, options.url)
log("Page loaded successfully", flush=True)
debug("Page loaded successfully")
except PlaywrightTimeoutError:
warnings.append("navigation timeout; capturing current page state")
log("Navigation timeout; proceeding with current state", flush=True)
debug("Navigation timeout; proceeding with current state")
# Skip article lookup by default (wait_for_article defaults to False)
if options.wait_for_article:
try:
log("Waiting for article element...", flush=True)
debug("Waiting for article element...")
page.wait_for_selector("article", timeout=10_000)
log("Article element found", flush=True)
debug("Article element found")
except PlaywrightTimeoutError:
warnings.append("<article> selector not found; capturing fallback")
log("Article element not found; using fallback", flush=True)
debug("Article element not found; using fallback")
if options.wait_after_load > 0:
log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
debug(f"Waiting {options.wait_after_load}s for page stabilization...")
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
if options.replace_video_posters:
log("Replacing video elements with posters...", flush=True)
debug("Replacing video elements with posters...")
page.evaluate(
"""
document.querySelectorAll('video').forEach(v => {
@@ -384,7 +384,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
# Attempt platform-specific target capture if requested (and not PDF)
element_captured = False
if options.prefer_platform_target and format_name != "pdf":
log("Attempting platform-specific content capture...", flush=True)
debug("Attempting platform-specific content capture...")
try:
_platform_preprocess(options.url, page, warnings)
except Exception as e:
@@ -397,36 +397,36 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
debug(f"[_capture] Trying selectors: {selectors}")
for sel in selectors:
try:
log(f"Trying selector: {sel}", flush=True)
debug(f"Trying selector: {sel}")
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
except PlaywrightTimeoutError:
log(f"Selector not found: {sel}", flush=True)
debug(f"Selector not found: {sel}")
continue
try:
if el is not None:
log(f"Found element with selector: {sel}", flush=True)
debug(f"Found element with selector: {sel}")
try:
el.scroll_into_view_if_needed(timeout=1000)
except Exception:
pass
log(f"Capturing element to {destination}...", flush=True)
debug(f"Capturing element to {destination}...")
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
element_captured = True
log("Element captured successfully", flush=True)
debug("Element captured successfully")
break
except Exception as exc:
warnings.append(f"element capture failed for '{sel}': {exc}")
log(f"Failed to capture element: {exc}", flush=True)
debug(f"Failed to capture element: {exc}")
# Fallback to default capture paths
if element_captured:
pass
elif format_name == "pdf":
log("Generating PDF...", flush=True)
debug("Generating PDF...")
page.emulate_media(media="print")
page.pdf(path=str(destination), print_background=True)
log(f"PDF saved to {destination}", flush=True)
debug(f"PDF saved to {destination}")
else:
log(f"Capturing full page to {destination}...", flush=True)
debug(f"Capturing full page to {destination}...")
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
if format_name == "jpeg":
screenshot_kwargs["type"] = "jpeg"
@@ -441,7 +441,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
article.screenshot(**article_kwargs)
else:
page.screenshot(**screenshot_kwargs)
log(f"Screenshot saved to {destination}", flush=True)
debug(f"Screenshot saved to {destination}")
except Exception as exc:
debug(f"[_capture] Exception launching browser/page: {exc}")
msg = str(exc).lower()
@@ -587,7 +587,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if storage_value:
try:
screenshot_dir = SharedArgs.resolve_storage(storage_value)
log(f"[screen_shot] Using --storage {storage_value}: {screenshot_dir}", flush=True)
debug(f"[screen_shot] Using --storage {storage_value}: {screenshot_dir}")
except ValueError as e:
log(str(e), file=sys.stderr)
return 1
@@ -596,7 +596,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if screenshot_dir is None and resolve_output_dir is not None:
try:
screenshot_dir = resolve_output_dir(config)
log(f"[screen_shot] Using config resolver: {screenshot_dir}", flush=True)
debug(f"[screen_shot] Using config resolver: {screenshot_dir}")
except Exception:
pass
@@ -604,14 +604,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if screenshot_dir is None and config and config.get("outfile"):
try:
screenshot_dir = Path(config["outfile"]).expanduser()
log(f"[screen_shot] Using config outfile: {screenshot_dir}", flush=True)
debug(f"[screen_shot] Using config outfile: {screenshot_dir}")
except Exception:
pass
# Default: User's Videos directory
if screenshot_dir is None:
screenshot_dir = Path.home() / "Videos"
log(f"[screen_shot] Using default directory: {screenshot_dir}", flush=True)
debug(f"[screen_shot] Using default directory: {screenshot_dir}")
ensure_directory(screenshot_dir)
@@ -693,11 +693,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
screenshot_result = _capture_screenshot(options)
# Log results and warnings
log(f"Screenshot captured to {screenshot_result.path}", flush=True)
debug(f"Screenshot captured to {screenshot_result.path}")
if screenshot_result.archive_url:
log(f"Archives: {', '.join(screenshot_result.archive_url)}", flush=True)
debug(f"Archives: {', '.join(screenshot_result.archive_url)}")
for warning in screenshot_result.warnings:
log(f"Warning: {warning}", flush=True)
debug(f"Warning: {warning}")
# Compute hash of screenshot file
screenshot_hash = None
@@ -762,8 +762,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"No screenshots were successfully captured", file=sys.stderr)
return 1
# Log completion message
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)", flush=True)
# Log completion message (keep this as normal output)
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)")
return exit_code
CMDLET = Cmdlet(